LLVM 11.0.0
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
93#include <algorithm>
94#include <cassert>
95#include <cstdint>
96#include <iterator>
97#include <list>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
105static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
106cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
107
108static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
109cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
110
111static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
112cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
113
114static cl::opt<bool> DisableSCO("disable-ppc-sco",
115cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
116
117static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
118cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
119
120static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
121cl::desc("use absolute jump tables on ppc"), cl::Hidden);
122
123STATISTIC(NumTailCalls, "Number of tail calls");
124STATISTIC(NumSiblingCalls, "Number of sibling calls");
125STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
126STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
127
128static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
129
130static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
131
132// FIXME: Remove this once the bug has been fixed!
134
136 const PPCSubtarget &STI)
137 : TargetLowering(TM), Subtarget(STI) {
138 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
139 // arguments are at least 4/8 bytes aligned.
140 bool isPPC64 = Subtarget.isPPC64();
141 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
142
143 // Set up the register classes.
144 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
145 if (!useSoftFloat()) {
146 if (hasSPE()) {
147 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
148 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
149 } else {
150 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
151 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
152 }
153 }
154
155 // Match BITREVERSE to customized fast code sequence in the td file.
158
159 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161
162 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
163 for (MVT VT : MVT::integer_valuetypes()) {
166 }
167
168 if (Subtarget.isISA3_0()) {
173 } else {
174 // No extending loads from f16 or HW conversions back and forth.
183 }
184
186
187 // PowerPC has pre-inc load and store's.
198 if (!Subtarget.hasSPE()) {
203 }
204
205 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
206 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
207 for (MVT VT : ScalarIntVTs) {
212 }
213
214 if (Subtarget.useCRBits()) {
216
217 if (isPPC64 || Subtarget.hasFPCVT()) {
220 isPPC64 ? MVT::i64 : MVT::i32);
223 isPPC64 ? MVT::i64 : MVT::i32);
224 } else {
227 }
228
229 // PowerPC does not support direct load/store of condition registers.
232
233 // FIXME: Remove this once the ANDI glue bug is fixed:
234 if (ANDIGlueBug)
236
237 for (MVT VT : MVT::integer_valuetypes()) {
241 }
242
243 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
244 }
245
246 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
247 // PPC (the libcall is not available).
250
251 // We do not currently implement these libm ops for PowerPC.
258
259 // PowerPC has no SREM/UREM instructions unless we are on P9
260 // On P9 we may use a hardware instruction to compute the remainder.
261 // When the result of both the remainder and the division is required it is
262 // more efficient to compute the remainder from the result of the division
263 // rather than use the remainder instruction. The instructions are legalized
264 // directly because the DivRemPairsPass performs the transformation at the IR
265 // level.
266 if (Subtarget.isISA3_0()) {
271 } else {
276 }
277
278 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
287
288 // Handle constrained floating-point operations of scalar.
289 // TODO: Handle SPE specific operation.
296
302 if (Subtarget.hasVSX())
304
305 if (Subtarget.hasFSQRT()) {
308 }
309
310 if (Subtarget.hasFPRND()) {
315
320 }
321
322 // We don't support sin/cos/sqrt/fmod/pow
333 if (Subtarget.hasSPE()) {
336 } else {
339 }
340
342
343 // If we're enabling GP optimizations, use hardware square root
344 if (!Subtarget.hasFSQRT() &&
345 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
346 Subtarget.hasFRE()))
348
349 if (!Subtarget.hasFSQRT() &&
350 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
351 Subtarget.hasFRES()))
353
354 if (Subtarget.hasFCPSGN()) {
357 } else {
360 }
361
362 if (Subtarget.hasFPRND()) {
367
372 }
373
374 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
375 // to speed up scalar BSWAP64.
376 // CTPOP or CTTZ were introduced in P8/P9 respectively
378 if (Subtarget.hasP9Vector())
380 else
382 if (Subtarget.isISA3_0()) {
385 } else {
388 }
389
390 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
393 } else {
396 }
397
398 // PowerPC does not have ROTR
401
402 if (!Subtarget.useCRBits()) {
403 // PowerPC does not have Select
408 }
409
410 // PowerPC wants to turn select_cc of FP into fsel when possible.
413
414 // PowerPC wants to optimize integer setcc a bit
415 if (!Subtarget.useCRBits())
417
418 // PowerPC does not have BRCOND which requires SetCC
419 if (!Subtarget.useCRBits())
421
423
424 if (Subtarget.hasSPE()) {
425 // SPE has built-in conversions
432 } else {
433 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
435
436 // PowerPC does not have [U|S]INT_TO_FP
439 }
440
441 if (Subtarget.hasDirectMove() && isPPC64) {
446 if (TM.Options.UnsafeFPMath) {
455 }
456 } else {
461 }
462
463 // We cannot sextinreg(i1). Expand to shifts.
465
466 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
467 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
468 // support continuation, user-level threading, and etc.. As a result, no
469 // other SjLj exception interfaces are implemented and please don't build
470 // your own exception handling based on them.
471 // LLVM/Clang supports zero-cost DWARF exception handling.
474
475 // We want to legalize GlobalAddress and ConstantPool nodes into the
476 // appropriate instructions to materialize the address.
487
488 // TRAP is legal.
490
491 // TRAMPOLINE is custom lowered.
494
495 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
497
498 if (Subtarget.is64BitELFABI()) {
499 // VAARG always uses double-word chunks, so promote anything smaller.
509 } else if (Subtarget.is32BitELFABI()) {
510 // VAARG is custom lowered with the 32-bit SVR4 ABI.
513 } else
515
516 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
517 if (Subtarget.is32BitELFABI())
519 else
521
522 // Use the default implementation.
532
533 // We want to custom lower some of our intrinsics.
535
536 // To handle counter-based loop conditions.
538
543
544 // Comparisons that require checking two conditions.
545 if (Subtarget.hasSPE()) {
550 }
563
564 if (Subtarget.has64BitSupport()) {
565 // They also have instructions for converting between i64 and fp.
570 // This is just the low 32 bits of a (signed) fp->i64 conversion.
571 // We cannot do this with Promote because i64 is not a legal type.
573
574 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
576 } else {
577 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
578 if (Subtarget.hasSPE()) {
581 } else
583 }
584
585 // With the instructions enabled under FPCVT, we can do everything.
586 if (Subtarget.hasFPCVT()) {
587 if (Subtarget.has64BitSupport()) {
592 }
593
598 }
599
600 if (Subtarget.use64BitRegs()) {
601 // 64-bit PowerPC implementations can support i64 types directly
602 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
603 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
605 // 64-bit PowerPC wants to expand i128 shifts itself.
609 } else {
610 // 32-bit PowerPC wants to expand i64 shifts itself.
614 }
615
616 if (Subtarget.hasVSX()) {
621 }
622
623 if (Subtarget.hasAltivec()) {
624 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
629 }
630 // First set operation action for all vector types to expand. Then we
631 // will selectively turn on ones that can be effectively codegen'd.
633 // add/sub are legal for all supported vector VT's.
636
637 // For v2i64, these are only valid with P8Vector. This is corrected after
638 // the loop.
639 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
644 }
645 else {
650 }
651
652 if (Subtarget.hasVSX()) {
655 }
656
657 // Vector instructions introduced in P8
658 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
661 }
662 else {
665 }
666
667 // Vector instructions introduced in P9
668 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
670 else
672
673 // We promote all shuffles to v16i8.
676
677 // We promote all non-typed operations to v4i32.
693
694 // No other operations are legal.
732
738 }
739 }
741 if (!Subtarget.hasP8Vector()) {
746 }
747
748 for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
750
751 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
752 // with merges, splats, etc.
754
755 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
756 // are cheap, so handle them before they get expanded to scalar.
762
768 Subtarget.useCRBits() ? Legal : Expand);
778
779 // Without hasP8Altivec set, v2i64 SMAX isn't available.
780 // But ABS custom lowering requires SMAX support.
781 if (!Subtarget.hasP8Altivec())
783
784 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
786 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
787 if (Subtarget.hasAltivec())
788 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
790 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
791 if (Subtarget.hasP8Altivec())
793
794 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
795 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
796 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
797 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
798
801
802 if (Subtarget.hasVSX()) {
805 }
806
807 if (Subtarget.hasP8Altivec())
809 else
811
814
817
822
823 // Altivec does not contain unordered floating-point compare instructions
828
829 if (Subtarget.hasVSX()) {
832 if (Subtarget.hasP8Vector()) {
835 }
836 if (Subtarget.hasDirectMove() && isPPC64) {
845 }
847
848 // The nearbyint variants are not allowed to raise the inexact exception
849 // so we can only code-gen them with unsafe math.
850 if (TM.Options.UnsafeFPMath) {
853 }
854
863
869
872
875
876 // Share the Altivec comparison restrictions.
881
884
886
887 if (Subtarget.hasP8Vector())
888 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
889
890 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
891
892 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
893 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
894 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
895
896 if (Subtarget.hasP8Altivec()) {
900
901 // 128 bit shifts can be accomplished via 3 instructions for SHL and
902 // SRL, but not for SRA because of the instructions available:
903 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
904 // doing
908
910 }
911 else {
915
917
918 // VSX v2i64 only supports non-arithmetic operations.
921 }
922
924
929
931
936
937 // Custom handling for partial vectors of integers converted to
938 // floating point. We already have optimal handling for v2i32 through
939 // the DAG combine, so those aren't necessary.
948
955
956 if (Subtarget.hasDirectMove())
959
960 // Handle constrained floating-point operations of vector.
961 // The predictor is `hasVSX` because altivec instruction has
962 // no exception but VSX vector instruction has.
976
990
991 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
992 }
993
994 if (Subtarget.hasP8Altivec()) {
995 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
996 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
997 }
998
999 if (Subtarget.hasP9Vector()) {
1002
1003 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1004 // SRL, but not for SRA because of the instructions available:
1005 // VS{RL} and VS{RL}O.
1009
1010 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1016 // No extending loads to f128 on PPC.
1017 for (MVT FPT : MVT::fp_valuetypes())
1026
1033
1040 // No implementation for these ops for PowerPC.
1046
1047 // Handle constrained floating-point operations of fp128
1068 }
1069
1070 if (Subtarget.hasP9Altivec()) {
1073
1081 }
1082 }
1083
1084 if (Subtarget.hasQPX()) {
1089
1092
1095
1098
1099 if (!Subtarget.useCRBits())
1102
1110
1113
1116
1127
1130
1133
1134 addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
1135
1140
1143
1146
1147 if (!Subtarget.useCRBits())
1150
1158
1161
1172
1175
1178
1179 addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
1180
1184
1185 if (!Subtarget.useCRBits())
1188
1191
1199
1202
1203 addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
1204
1209
1214
1217
1218 // These need to set FE_INEXACT, and so cannot be vectorized here.
1221
1222 if (TM.Options.UnsafeFPMath) {
1225
1228 } else {
1231
1234 }
1235
1236 // TODO: Handle constrained floating-point operations of v4f64
1237 }
1238
1239 if (Subtarget.has64BitSupport())
1241
1243
1244 if (!isPPC64) {
1247 }
1248
1250
1251 if (Subtarget.hasAltivec()) {
1252 // Altivec instructions set fields to all zeros or all ones.
1254 }
1255
1256 if (!isPPC64) {
1257 // These libcalls are not available in 32-bit.
1258 setLibcallName(RTLIB::SHL_I128, nullptr);
1259 setLibcallName(RTLIB::SRL_I128, nullptr);
1260 setLibcallName(RTLIB::SRA_I128, nullptr);
1261 }
1262
1263 if (!isPPC64)
1265
1266 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1267
1268 // We have target-specific dag combine patterns for the following nodes:
1277 if (Subtarget.hasFPCVT())
1282 if (Subtarget.useCRBits())
1288
1292
1295
1296
1297 if (Subtarget.useCRBits()) {
1301 }
1302
1303 if (Subtarget.hasP9Altivec()) {
1306 }
1307
1308 setLibcallName(RTLIB::LOG_F128, "logf128");
1309 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1310 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1311 setLibcallName(RTLIB::EXP_F128, "expf128");
1312 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1313 setLibcallName(RTLIB::SIN_F128, "sinf128");
1314 setLibcallName(RTLIB::COS_F128, "cosf128");
1315 setLibcallName(RTLIB::POW_F128, "powf128");
1316 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1317 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1318 setLibcallName(RTLIB::POWI_F128, "__powikf2");
1319 setLibcallName(RTLIB::REM_F128, "fmodf128");
1320
1321 // With 32 condition bits, we don't need to sink (and duplicate) compares
1322 // aggressively in CodeGenPrep.
1323 if (Subtarget.useCRBits()) {
1326 }
1327
1329
1330 switch (Subtarget.getCPUDirective()) {
1331 default: break;
1332 case PPC::DIR_970:
1333 case PPC::DIR_A2:
1334 case PPC::DIR_E500:
1335 case PPC::DIR_E500mc:
1336 case PPC::DIR_E5500:
1337 case PPC::DIR_PWR4:
1338 case PPC::DIR_PWR5:
1339 case PPC::DIR_PWR5X:
1340 case PPC::DIR_PWR6:
1341 case PPC::DIR_PWR6X:
1342 case PPC::DIR_PWR7:
1343 case PPC::DIR_PWR8:
1344 case PPC::DIR_PWR9:
1345 case PPC::DIR_PWR10:
1349 break;
1350 }
1351
1352 if (Subtarget.enableMachineScheduler())
1354 else
1356
1358
1359 // The Freescale cores do better with aggressive inlining of memcpy and
1360 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1361 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1362 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1363 MaxStoresPerMemset = 32;
1365 MaxStoresPerMemcpy = 32;
1369 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1370 // The A2 also benefits from (very) aggressive inlining of memcpy and
1371 // friends. The overhead of a the function call, even when warm, can be
1372 // over one hundred cycles.
1373 MaxStoresPerMemset = 128;
1374 MaxStoresPerMemcpy = 128;
1375 MaxStoresPerMemmove = 128;
1376 MaxLoadsPerMemcmp = 128;
1377 } else {
1380 }
1381
1382 // Let the subtarget (CPU) decide if a predictable select is more expensive
1383 // than the corresponding branch. This information is used in CGP to decide
1384 // when to convert selects into branches.
1386}
1387
1388/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1389/// the desired ByVal argument alignment.
1391 if (MaxAlign == MaxMaxAlign)
1392 return;
1393 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1394 if (MaxMaxAlign >= 32 &&
1395 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1396 MaxAlign = Align(32);
1397 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1398 MaxAlign < 16)
1399 MaxAlign = Align(16);
1400 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1402 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1403 if (EltAlign > MaxAlign)
1405 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1406 for (auto *EltTy : STy->elements()) {
1409 if (EltAlign > MaxAlign)
1411 if (MaxAlign == MaxMaxAlign)
1412 break;
1413 }
1414 }
1415}
1416
1417/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1418/// function arguments in the caller parameter area.
1420 const DataLayout &DL) const {
1421 // 16byte and wider vectors are passed on 16byte boundary.
1422 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1423 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1424 if (Subtarget.hasAltivec() || Subtarget.hasQPX())
1425 getMaxByValAlign(Ty, Alignment, Subtarget.hasQPX() ? Align(32) : Align(16));
1426 return Alignment.value();
1427}
1428
1430 return Subtarget.useSoftFloat();
1431}
1432
1434 return Subtarget.hasSPE();
1435}
1436
1438 return VT.isScalarInteger();
1439}
1440
1441/// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific
1442/// type is cheaper than a multiply followed by a shift.
1443/// This is true for words and doublewords on 64-bit PowerPC.
1450
1451const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1452 switch ((PPCISD::NodeType)Opcode) {
1453 case PPCISD::FIRST_NUMBER: break;
1454 case PPCISD::FSEL: return "PPCISD::FSEL";
1455 case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1456 case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1457 case PPCISD::FCFID: return "PPCISD::FCFID";
1458 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1459 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1460 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1461 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1462 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1463 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1464 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1466 return "PPCISD::FP_TO_UINT_IN_VSR,";
1468 return "PPCISD::FP_TO_SINT_IN_VSR";
1469 case PPCISD::FRE: return "PPCISD::FRE";
1470 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1471 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1472 case PPCISD::VPERM: return "PPCISD::VPERM";
1473 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1475 return "PPCISD::XXSPLTI_SP_TO_DP";
1477 return "PPCISD::XXSPLTI32DX";
1478 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1479 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1480 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1481 case PPCISD::CMPB: return "PPCISD::CMPB";
1482 case PPCISD::Hi: return "PPCISD::Hi";
1483 case PPCISD::Lo: return "PPCISD::Lo";
1484 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1485 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1486 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1487 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1488 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1489 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1490 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1491 case PPCISD::SRL: return "PPCISD::SRL";
1492 case PPCISD::SRA: return "PPCISD::SRA";
1493 case PPCISD::SHL: return "PPCISD::SHL";
1494 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1495 case PPCISD::CALL: return "PPCISD::CALL";
1496 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1497 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1498 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1499 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1500 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1501 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1502 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1503 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1504 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1505 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1506 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1507 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1508 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1509 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1510 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1512 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1514 return "PPCISD::ANDI_rec_1_EQ_BIT";
1516 return "PPCISD::ANDI_rec_1_GT_BIT";
1517 case PPCISD::VCMP: return "PPCISD::VCMP";
1518 case PPCISD::VCMPo: return "PPCISD::VCMPo";
1519 case PPCISD::LBRX: return "PPCISD::LBRX";
1520 case PPCISD::STBRX: return "PPCISD::STBRX";
1521 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1522 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1523 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1524 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1525 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1526 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1527 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1528 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1529 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1531 return "PPCISD::ST_VSR_SCAL_INT";
1532 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1533 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1534 case PPCISD::BDZ: return "PPCISD::BDZ";
1535 case PPCISD::MFFS: return "PPCISD::MFFS";
1536 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1537 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1538 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1539 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1540 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1541 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1542 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1543 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1544 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1545 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1546 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1547 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1548 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1549 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1550 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1551 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1552 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1553 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1554 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1555 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1556 case PPCISD::SC: return "PPCISD::SC";
1557 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1558 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1559 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1560 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1561 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1562 case PPCISD::VABSD: return "PPCISD::VABSD";
1563 case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
1564 case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
1565 case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
1566 case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
1567 case PPCISD::QBFLT: return "PPCISD::QBFLT";
1568 case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
1569 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1570 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1571 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1572 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1573 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1574 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1575 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1576 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1577 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1578 }
1579 return nullptr;
1580}
1581
1583 EVT VT) const {
1584 if (!VT.isVector())
1585 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1586
1587 if (Subtarget.hasQPX())
1589
1591}
1592
1594 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1595 return true;
1596}
1597
1598//===----------------------------------------------------------------------===//
1599// Node matching predicates, for use by the tblgen matching code.
1600//===----------------------------------------------------------------------===//
1601
1602/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1605 return CFP->getValueAPF().isZero();
1606 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1607 // Maybe this has already been legalized into the constant pool?
1608 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1609 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1610 return CFP->getValueAPF().isZero();
1611 }
1612 return false;
1613}
1614
1615/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1616/// true if Op is undef or if it matches the specified value.
1617static bool isConstantOrUndef(int Op, int Val) {
1618 return Op < 0 || Op == Val;
1619}
1620
1621/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1622/// VPKUHUM instruction.
1623/// The ShuffleKind distinguishes between big-endian operations with
1624/// two different inputs (0), either-endian operations with two identical
1625/// inputs (1), and little-endian operations with two different inputs (2).
1626/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1628 SelectionDAG &DAG) {
1629 bool IsLE = DAG.getDataLayout().isLittleEndian();
1630 if (ShuffleKind == 0) {
1631 if (IsLE)
1632 return false;
1633 for (unsigned i = 0; i != 16; ++i)
1634 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1635 return false;
1636 } else if (ShuffleKind == 2) {
1637 if (!IsLE)
1638 return false;
1639 for (unsigned i = 0; i != 16; ++i)
1640 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1641 return false;
1642 } else if (ShuffleKind == 1) {
1643 unsigned j = IsLE ? 0 : 1;
1644 for (unsigned i = 0; i != 8; ++i)
1645 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1646 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1647 return false;
1648 }
1649 return true;
1650}
1651
1652/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1653/// VPKUWUM instruction.
1654/// The ShuffleKind distinguishes between big-endian operations with
1655/// two different inputs (0), either-endian operations with two identical
1656/// inputs (1), and little-endian operations with two different inputs (2).
1657/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1659 SelectionDAG &DAG) {
1660 bool IsLE = DAG.getDataLayout().isLittleEndian();
1661 if (ShuffleKind == 0) {
1662 if (IsLE)
1663 return false;
1664 for (unsigned i = 0; i != 16; i += 2)
1665 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1666 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1667 return false;
1668 } else if (ShuffleKind == 2) {
1669 if (!IsLE)
1670 return false;
1671 for (unsigned i = 0; i != 16; i += 2)
1672 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1673 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1674 return false;
1675 } else if (ShuffleKind == 1) {
1676 unsigned j = IsLE ? 0 : 2;
1677 for (unsigned i = 0; i != 8; i += 2)
1678 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1679 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1680 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1681 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1682 return false;
1683 }
1684 return true;
1685}
1686
1687/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1688/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1689/// current subtarget.
1690///
1691/// The ShuffleKind distinguishes between big-endian operations with
1692/// two different inputs (0), either-endian operations with two identical
1693/// inputs (1), and little-endian operations with two different inputs (2).
1694/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1696 SelectionDAG &DAG) {
1697 const PPCSubtarget& Subtarget =
1698 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1699 if (!Subtarget.hasP8Vector())
1700 return false;
1701
1702 bool IsLE = DAG.getDataLayout().isLittleEndian();
1703 if (ShuffleKind == 0) {
1704 if (IsLE)
1705 return false;
1706 for (unsigned i = 0; i != 16; i += 4)
1707 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1708 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1709 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1710 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1711 return false;
1712 } else if (ShuffleKind == 2) {
1713 if (!IsLE)
1714 return false;
1715 for (unsigned i = 0; i != 16; i += 4)
1716 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1717 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1718 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1719 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1720 return false;
1721 } else if (ShuffleKind == 1) {
1722 unsigned j = IsLE ? 0 : 4;
1723 for (unsigned i = 0; i != 8; i += 4)
1724 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1725 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1726 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1727 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1728 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1729 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1730 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1731 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1732 return false;
1733 }
1734 return true;
1735}
1736
1737/// isVMerge - Common function, used to match vmrg* shuffles.
1738///
1739static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1740 unsigned LHSStart, unsigned RHSStart) {
1741 if (N->getValueType(0) != MVT::v16i8)
1742 return false;
1743 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1744 "Unsupported merge size!");
1745
1746 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1747 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1748 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1749 LHSStart+j+i*UnitSize) ||
1750 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1751 RHSStart+j+i*UnitSize))
1752 return false;
1753 }
1754 return true;
1755}
1756
1757/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1758/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1759/// The ShuffleKind distinguishes between big-endian merges with two
1760/// different inputs (0), either-endian merges with two identical inputs (1),
1761/// and little-endian merges with two different inputs (2). For the latter,
1762/// the input operands are swapped (see PPCInstrAltivec.td).
1764 unsigned ShuffleKind, SelectionDAG &DAG) {
1765 if (DAG.getDataLayout().isLittleEndian()) {
1766 if (ShuffleKind == 1) // unary
1767 return isVMerge(N, UnitSize, 0, 0);
1768 else if (ShuffleKind == 2) // swapped
1769 return isVMerge(N, UnitSize, 0, 16);
1770 else
1771 return false;
1772 } else {
1773 if (ShuffleKind == 1) // unary
1774 return isVMerge(N, UnitSize, 8, 8);
1775 else if (ShuffleKind == 0) // normal
1776 return isVMerge(N, UnitSize, 8, 24);
1777 else
1778 return false;
1779 }
1780}
1781
1782/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1783/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1784/// The ShuffleKind distinguishes between big-endian merges with two
1785/// different inputs (0), either-endian merges with two identical inputs (1),
1786/// and little-endian merges with two different inputs (2). For the latter,
1787/// the input operands are swapped (see PPCInstrAltivec.td).
1789 unsigned ShuffleKind, SelectionDAG &DAG) {
1790 if (DAG.getDataLayout().isLittleEndian()) {
1791 if (ShuffleKind == 1) // unary
1792 return isVMerge(N, UnitSize, 8, 8);
1793 else if (ShuffleKind == 2) // swapped
1794 return isVMerge(N, UnitSize, 8, 24);
1795 else
1796 return false;
1797 } else {
1798 if (ShuffleKind == 1) // unary
1799 return isVMerge(N, UnitSize, 0, 0);
1800 else if (ShuffleKind == 0) // normal
1801 return isVMerge(N, UnitSize, 0, 16);
1802 else
1803 return false;
1804 }
1805}
1806
1807/**
1808 * Common function used to match vmrgew and vmrgow shuffles
1809 *
1810 * The indexOffset determines whether to look for even or odd words in
1811 * the shuffle mask. This is based on the of the endianness of the target
1812 * machine.
1813 * - Little Endian:
1814 * - Use offset of 0 to check for odd elements
1815 * - Use offset of 4 to check for even elements
1816 * - Big Endian:
1817 * - Use offset of 0 to check for even elements
1818 * - Use offset of 4 to check for odd elements
1819 * A detailed description of the vector element ordering for little endian and
1820 * big endian can be found at
1821 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1822 * Targeting your applications - what little endian and big endian IBM XL C/C++
1823 * compiler differences mean to you
1824 *
1825 * The mask to the shuffle vector instruction specifies the indices of the
1826 * elements from the two input vectors to place in the result. The elements are
1827 * numbered in array-access order, starting with the first vector. These vectors
1828 * are always of type v16i8, thus each vector will contain 16 elements of size
1829 * 8. More info on the shuffle vector can be found in the
1830 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1831 * Language Reference.
1832 *
1833 * The RHSStartValue indicates whether the same input vectors are used (unary)
1834 * or two different input vectors are used, based on the following:
1835 * - If the instruction uses the same vector for both inputs, the range of the
1836 * indices will be 0 to 15. In this case, the RHSStart value passed should
1837 * be 0.
1838 * - If the instruction has two different vectors then the range of the
1839 * indices will be 0 to 31. In this case, the RHSStart value passed should
1840 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1841 * to 31 specify elements in the second vector).
1842 *
1843 * \param[in] N The shuffle vector SD Node to analyze
1844 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1845 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1846 * vector to the shuffle_vector instruction
1847 * \return true iff this shuffle vector represents an even or odd word merge
1848 */
1850 unsigned RHSStartValue) {
1851 if (N->getValueType(0) != MVT::v16i8)
1852 return false;
1853
1854 for (unsigned i = 0; i < 2; ++i)
1855 for (unsigned j = 0; j < 4; ++j)
1856 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1858 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1860 return false;
1861 return true;
1862}
1863
1864/**
1865 * Determine if the specified shuffle mask is suitable for the vmrgew or
1866 * vmrgow instructions.
1867 *
1868 * \param[in] N The shuffle vector SD Node to analyze
1869 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1870 * \param[in] ShuffleKind Identify the type of merge:
1871 * - 0 = big-endian merge with two different inputs;
1872 * - 1 = either-endian merge with two identical inputs;
1873 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1874 * little-endian merges).
1875 * \param[in] DAG The current SelectionDAG
1876 * \return true iff this shuffle mask
1877 */
1879 unsigned ShuffleKind, SelectionDAG &DAG) {
1880 if (DAG.getDataLayout().isLittleEndian()) {
1881 unsigned indexOffset = CheckEven ? 4 : 0;
1882 if (ShuffleKind == 1) // Unary
1883 return isVMerge(N, indexOffset, 0);
1884 else if (ShuffleKind == 2) // swapped
1885 return isVMerge(N, indexOffset, 16);
1886 else
1887 return false;
1888 }
1889 else {
1890 unsigned indexOffset = CheckEven ? 0 : 4;
1891 if (ShuffleKind == 1) // Unary
1892 return isVMerge(N, indexOffset, 0);
1893 else if (ShuffleKind == 0) // Normal
1894 return isVMerge(N, indexOffset, 16);
1895 else
1896 return false;
1897 }
1898 return false;
1899}
1900
1901/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1902/// amount, otherwise return -1.
1903/// The ShuffleKind distinguishes between big-endian operations with two
1904/// different inputs (0), either-endian operations with two identical inputs
1905/// (1), and little-endian operations with two different inputs (2). For the
1906/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1907int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1908 SelectionDAG &DAG) {
1909 if (N->getValueType(0) != MVT::v16i8)
1910 return -1;
1911
1913
1914 // Find the first non-undef value in the shuffle mask.
1915 unsigned i;
1916 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1917 /*search*/;
1918
1919 if (i == 16) return -1; // all undef.
1920
1921 // Otherwise, check to see if the rest of the elements are consecutively
1922 // numbered from this value.
1923 unsigned ShiftAmt = SVOp->getMaskElt(i);
1924 if (ShiftAmt < i) return -1;
1925
1926 ShiftAmt -= i;
1927 bool isLE = DAG.getDataLayout().isLittleEndian();
1928
1929 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1930 // Check the rest of the elements to see if they are consecutive.
1931 for (++i; i != 16; ++i)
1932 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1933 return -1;
1934 } else if (ShuffleKind == 1) {
1935 // Check the rest of the elements to see if they are consecutive.
1936 for (++i; i != 16; ++i)
1937 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1938 return -1;
1939 } else
1940 return -1;
1941
1942 if (isLE)
1943 ShiftAmt = 16 - ShiftAmt;
1944
1945 return ShiftAmt;
1946}
1947
1948/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1949/// specifies a splat of a single element that is suitable for input to
1950/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1952 assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1953 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1954
1955 // The consecutive indices need to specify an element, not part of two
1956 // different elements. So abandon ship early if this isn't the case.
1957 if (N->getMaskElt(0) % EltSize != 0)
1958 return false;
1959
1960 // This is a splat operation if each element of the permute is the same, and
1961 // if the value doesn't reference the second vector.
1962 unsigned ElementBase = N->getMaskElt(0);
1963
1964 // FIXME: Handle UNDEF elements too!
1965 if (ElementBase >= 16)
1966 return false;
1967
1968 // Check that the indices are consecutive, in the case of a multi-byte element
1969 // splatted with a v16i8 mask.
1970 for (unsigned i = 1; i != EltSize; ++i)
1971 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1972 return false;
1973
1974 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1975 if (N->getMaskElt(i) < 0) continue;
1976 for (unsigned j = 0; j != EltSize; ++j)
1977 if (N->getMaskElt(i+j) != N->getMaskElt(j))
1978 return false;
1979 }
1980 return true;
1981}
1982
1983/// Check that the mask is shuffling N byte elements. Within each N byte
1984/// element of the mask, the indices could be either in increasing or
1985/// decreasing order as long as they are consecutive.
1986/// \param[in] N the shuffle vector SD Node to analyze
1987/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1988/// Word/DoubleWord/QuadWord).
1989/// \param[in] StepLen the delta indices number among the N byte element, if
1990/// the mask is in increasing/decreasing order then it is 1/-1.
1991/// \return true iff the mask is shuffling N byte elements.
1992static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1993 int StepLen) {
1994 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1995 "Unexpected element width.");
1996 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1997
1998 unsigned NumOfElem = 16 / Width;
1999 unsigned MaskVal[16]; // Width is never greater than 16
2000 for (unsigned i = 0; i < NumOfElem; ++i) {
2001 MaskVal[0] = N->getMaskElt(i * Width);
2002 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2003 return false;
2004 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2005 return false;
2006 }
2007
2008 for (unsigned int j = 1; j < Width; ++j) {
2009 MaskVal[j] = N->getMaskElt(i * Width + j);
2010 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2011 return false;
2012 }
2013 }
2014 }
2015
2016 return true;
2017}
2018
2020 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2021 if (!isNByteElemShuffleMask(N, 4, 1))
2022 return false;
2023
2024 // Now we look at mask elements 0,4,8,12
2025 unsigned M0 = N->getMaskElt(0) / 4;
2026 unsigned M1 = N->getMaskElt(4) / 4;
2027 unsigned M2 = N->getMaskElt(8) / 4;
2028 unsigned M3 = N->getMaskElt(12) / 4;
2029 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2030 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2031
2032 // Below, let H and L be arbitrary elements of the shuffle mask
2033 // where H is in the range [4,7] and L is in the range [0,3].
2034 // H, 1, 2, 3 or L, 5, 6, 7
2035 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2036 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2038 InsertAtByte = IsLE ? 12 : 0;
2039 Swap = M0 < 4;
2040 return true;
2041 }
2042 // 0, H, 2, 3 or 4, L, 6, 7
2043 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2044 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2046 InsertAtByte = IsLE ? 8 : 4;
2047 Swap = M1 < 4;
2048 return true;
2049 }
2050 // 0, 1, H, 3 or 4, 5, L, 7
2051 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2052 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2054 InsertAtByte = IsLE ? 4 : 8;
2055 Swap = M2 < 4;
2056 return true;
2057 }
2058 // 0, 1, 2, H or 4, 5, 6, L
2059 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2060 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2062 InsertAtByte = IsLE ? 0 : 12;
2063 Swap = M3 < 4;
2064 return true;
2065 }
2066
2067 // If both vector operands for the shuffle are the same vector, the mask will
2068 // contain only elements from the first one and the second one will be undef.
2069 if (N->getOperand(1).isUndef()) {
2070 ShiftElts = 0;
2071 Swap = true;
2072 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2073 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2074 InsertAtByte = IsLE ? 12 : 0;
2075 return true;
2076 }
2077 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2078 InsertAtByte = IsLE ? 8 : 4;
2079 return true;
2080 }
2081 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2082 InsertAtByte = IsLE ? 4 : 8;
2083 return true;
2084 }
2085 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2086 InsertAtByte = IsLE ? 0 : 12;
2087 return true;
2088 }
2089 }
2090
2091 return false;
2092}
2093
2095 bool &Swap, bool IsLE) {
2096 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2097 // Ensure each byte index of the word is consecutive.
2098 if (!isNByteElemShuffleMask(N, 4, 1))
2099 return false;
2100
2101 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2102 unsigned M0 = N->getMaskElt(0) / 4;
2103 unsigned M1 = N->getMaskElt(4) / 4;
2104 unsigned M2 = N->getMaskElt(8) / 4;
2105 unsigned M3 = N->getMaskElt(12) / 4;
2106
2107 // If both vector operands for the shuffle are the same vector, the mask will
2108 // contain only elements from the first one and the second one will be undef.
2109 if (N->getOperand(1).isUndef()) {
2110 assert(M0 < 4 && "Indexing into an undef vector?");
2111 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2112 return false;
2113
2114 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2115 Swap = false;
2116 return true;
2117 }
2118
2119 // Ensure each word index of the ShuffleVector Mask is consecutive.
2120 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2121 return false;
2122
2123 if (IsLE) {
2124 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2125 // Input vectors don't need to be swapped if the leading element
2126 // of the result is one of the 3 left elements of the second vector
2127 // (or if there is no shift to be done at all).
2128 Swap = false;
2129 ShiftElts = (8 - M0) % 8;
2130 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2131 // Input vectors need to be swapped if the leading element
2132 // of the result is one of the 3 left elements of the first vector
2133 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2134 Swap = true;
2135 ShiftElts = (4 - M0) % 4;
2136 }
2137
2138 return true;
2139 } else { // BE
2140 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2141 // Input vectors don't need to be swapped if the leading element
2142 // of the result is one of the 4 elements of the first vector.
2143 Swap = false;
2144 ShiftElts = M0;
2145 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2146 // Input vectors need to be swapped if the leading element
2147 // of the result is one of the 4 elements of the right vector.
2148 Swap = true;
2149 ShiftElts = M0 - 4;
2150 }
2151
2152 return true;
2153 }
2154}
2155
2157 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2158
2159 if (!isNByteElemShuffleMask(N, Width, -1))
2160 return false;
2161
2162 for (int i = 0; i < 16; i += Width)
2163 if (N->getMaskElt(i) != i + Width - 1)
2164 return false;
2165
2166 return true;
2167}
2168
2172
2176
2180
2184
2185/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2186/// if the inputs to the instruction should be swapped and set \p DM to the
2187/// value for the immediate.
2188/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2189/// AND element 0 of the result comes from the first input (LE) or second input
2190/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2191/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2192/// mask.
2194 bool &Swap, bool IsLE) {
2195 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2196
2197 // Ensure each byte index of the double word is consecutive.
2198 if (!isNByteElemShuffleMask(N, 8, 1))
2199 return false;
2200
2201 unsigned M0 = N->getMaskElt(0) / 8;
2202 unsigned M1 = N->getMaskElt(8) / 8;
2203 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2204
2205 // If both vector operands for the shuffle are the same vector, the mask will
2206 // contain only elements from the first one and the second one will be undef.
2207 if (N->getOperand(1).isUndef()) {
2208 if ((M0 | M1) < 2) {
2209 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2210 Swap = false;
2211 return true;
2212 } else
2213 return false;
2214 }
2215
2216 if (IsLE) {
2217 if (M0 > 1 && M1 < 2) {
2218 Swap = false;
2219 } else if (M0 < 2 && M1 > 1) {
2220 M0 = (M0 + 2) % 4;
2221 M1 = (M1 + 2) % 4;
2222 Swap = true;
2223 } else
2224 return false;
2225
2226 // Note: if control flow comes here that means Swap is already set above
2227 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2228 return true;
2229 } else { // BE
2230 if (M0 < 2 && M1 > 1) {
2231 Swap = false;
2232 } else if (M0 > 1 && M1 < 2) {
2233 M0 = (M0 + 2) % 4;
2234 M1 = (M1 + 2) % 4;
2235 Swap = true;
2236 } else
2237 return false;
2238
2239 // Note: if control flow comes here that means Swap is already set above
2240 DM = (M0 << 1) + (M1 & 1);
2241 return true;
2242 }
2243}
2244
2245
2246/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2247/// appropriate for PPC mnemonics (which have a big endian bias - namely
2248/// elements are counted from the left of the vector register).
2249unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2250 SelectionDAG &DAG) {
2252 assert(isSplatShuffleMask(SVOp, EltSize));
2253 if (DAG.getDataLayout().isLittleEndian())
2254 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2255 else
2256 return SVOp->getMaskElt(0) / EltSize;
2257}
2258
2259/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2260/// by using a vspltis[bhw] instruction of the specified element size, return
2261/// the constant being splatted. The ByteSize field indicates the number of
2262/// bytes of each element [124] -> [bhw].
2264 SDValue OpVal(nullptr, 0);
2265
2266 // If ByteSize of the splat is bigger than the element size of the
2267 // build_vector, then we have a case where we are checking for a splat where
2268 // multiple elements of the buildvector are folded together into a single
2269 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2270 unsigned EltSize = 16/N->getNumOperands();
2271 if (EltSize < ByteSize) {
2272 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2274 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2275
2276 // See if all of the elements in the buildvector agree across.
2277 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2278 if (N->getOperand(i).isUndef()) continue;
2279 // If the element isn't a constant, bail fully out.
2280 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2281
2282 if (!UniquedVals[i&(Multiple-1)].getNode())
2283 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2284 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2285 return SDValue(); // no match.
2286 }
2287
2288 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2289 // either constant or undef values that are identical for each chunk. See
2290 // if these chunks can form into a larger vspltis*.
2291
2292 // Check to see if all of the leading entries are either 0 or -1. If
2293 // neither, then this won't fit into the immediate field.
2294 bool LeadingZero = true;
2295 bool LeadingOnes = true;
2296 for (unsigned i = 0; i != Multiple-1; ++i) {
2297 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2298
2301 }
2302 // Finally, check the least significant entry.
2303 if (LeadingZero) {
2304 if (!UniquedVals[Multiple-1].getNode())
2305 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2306 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2307 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2308 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2309 }
2310 if (LeadingOnes) {
2311 if (!UniquedVals[Multiple-1].getNode())
2312 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2313 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2314 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2315 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2316 }
2317
2318 return SDValue();
2319 }
2320
2321 // Check to see if this buildvec has a single non-undef value in its elements.
2322 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2323 if (N->getOperand(i).isUndef()) continue;
2324 if (!OpVal.getNode())
2325 OpVal = N->getOperand(i);
2326 else if (OpVal != N->getOperand(i))
2327 return SDValue();
2328 }
2329
2330 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2331
2332 unsigned ValSizeInBytes = EltSize;
2333 uint64_t Value = 0;
2335 Value = CN->getZExtValue();
2337 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2338 Value = FloatToBits(CN->getValueAPF().convertToFloat());
2339 }
2340
2341 // If the splat value is larger than the element value, then we can never do
2342 // this splat. The only case that we could fit the replicated bits into our
2343 // immediate field for would be zero, and we prefer to use vxor for it.
2344 if (ValSizeInBytes < ByteSize) return SDValue();
2345
2346 // If the element value is larger than the splat value, check if it consists
2347 // of a repeated bit pattern of size ByteSize.
2348 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2349 return SDValue();
2350
2351 // Properly sign extend the value.
2352 int MaskVal = SignExtend32(Value, ByteSize * 8);
2353
2354 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2355 if (MaskVal == 0) return SDValue();
2356
2357 // Finally, if this value fits in a 5 bit sext field, return it
2359 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2360 return SDValue();
2361}
2362
2363/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2364/// amount, otherwise return -1.
2366 EVT VT = N->getValueType(0);
2367 if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2368 return -1;
2369
2371
2372 // Find the first non-undef value in the shuffle mask.
2373 unsigned i;
2374 for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2375 /*search*/;
2376
2377 if (i == 4) return -1; // all undef.
2378
2379 // Otherwise, check to see if the rest of the elements are consecutively
2380 // numbered from this value.
2381 unsigned ShiftAmt = SVOp->getMaskElt(i);
2382 if (ShiftAmt < i) return -1;
2383 ShiftAmt -= i;
2384
2385 // Check the rest of the elements to see if they are consecutive.
2386 for (++i; i != 4; ++i)
2387 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2388 return -1;
2389
2390 return ShiftAmt;
2391}
2392
2393//===----------------------------------------------------------------------===//
2394// Addressing Mode Selection
2395//===----------------------------------------------------------------------===//
2396
2397/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2398/// or 64-bit immediate, and if the value can be accurately represented as a
2399/// sign extension from a 16-bit value. If so, this returns true and the
2400/// immediate.
2401bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2402 if (!isa<ConstantSDNode>(N))
2403 return false;
2404
2405 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2406 if (N->getValueType(0) == MVT::i32)
2407 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2408 else
2409 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2410}
2411bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2412 return isIntS16Immediate(Op.getNode(), Imm);
2413}
2414
2415
2416/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2417/// be represented as an indexed [r+r] operation.
2419 SDValue &Index,
2420 SelectionDAG &DAG) const {
2421 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2422 UI != E; ++UI) {
2423 if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2424 if (Memop->getMemoryVT() == MVT::f64) {
2425 Base = N.getOperand(0);
2426 Index = N.getOperand(1);
2427 return true;
2428 }
2429 }
2430 }
2431 return false;
2432}
2433
2434/// SelectAddressRegReg - Given the specified addressed, check to see if it
2435/// can be represented as an indexed [r+r] operation. Returns false if it
2436/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2437/// non-zero and N can be represented by a base register plus a signed 16-bit
2438/// displacement, make a more precise judgement by checking (displacement % \p
2439/// EncodingAlignment).
2441 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2443 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2444 // a [pc+imm].
2445 if (SelectAddressPCRel(N, Base))
2446 return false;
2447
2448 int16_t Imm = 0;
2449 if (N.getOpcode() == ISD::ADD) {
2450 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2451 // SPE load/store can only handle 8-bit offsets.
2452 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2453 return true;
2454 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2456 return false; // r+i
2457 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2458 return false; // r+i
2459
2460 Base = N.getOperand(0);
2461 Index = N.getOperand(1);
2462 return true;
2463 } else if (N.getOpcode() == ISD::OR) {
2464 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2466 return false; // r+i can fold it if we can.
2467
2468 // If this is an or of disjoint bitfields, we can codegen this as an add
2469 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2470 // disjoint.
2471 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2472
2473 if (LHSKnown.Zero.getBoolValue()) {
2474 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2475 // If all of the bits are known zero on the LHS or RHS, the add won't
2476 // carry.
2477 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2478 Base = N.getOperand(0);
2479 Index = N.getOperand(1);
2480 return true;
2481 }
2482 }
2483 }
2484
2485 return false;
2486}
2487
2488// If we happen to be doing an i64 load or store into a stack slot that has
2489// less than a 4-byte alignment, then the frame-index elimination may need to
2490// use an indexed load or store instruction (because the offset may not be a
2491// multiple of 4). The extra register needed to hold the offset comes from the
2492// register scavenger, and it is possible that the scavenger will need to use
2493// an emergency spill slot. As a result, we need to make sure that a spill slot
2494// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2495// stack slot.
2496static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2497 // FIXME: This does not handle the LWA case.
2498 if (VT != MVT::i64)
2499 return;
2500
2501 // NOTE: We'll exclude negative FIs here, which come from argument
2502 // lowering, because there are no known test cases triggering this problem
2503 // using packed structures (or similar). We can remove this exclusion if
2504 // we find such a test case. The reason why this is so test-case driven is
2505 // because this entire 'fixup' is only to prevent crashes (from the
2506 // register scavenger) on not-really-valid inputs. For example, if we have:
2507 // %a = alloca i1
2508 // %b = bitcast i1* %a to i64*
2509 // store i64* a, i64 b
2510 // then the store should really be marked as 'align 1', but is not. If it
2511 // were marked as 'align 1' then the indexed form would have been
2512 // instruction-selected initially, and the problem this 'fixup' is preventing
2513 // won't happen regardless.
2514 if (FrameIdx < 0)
2515 return;
2516
2518 MachineFrameInfo &MFI = MF.getFrameInfo();
2519
2520 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2521 return;
2522
2523 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2524 FuncInfo->setHasNonRISpills();
2525}
2526
2527/// Returns true if the address N can be represented by a base register plus
2528/// a signed 16-bit displacement [r+imm], and if it is not better
2529/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2530/// displacements that are multiples of that value.
2532 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2534 // FIXME dl should come from parent load or store, not from address
2535 SDLoc dl(N);
2536
2537 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2538 // a [pc+imm].
2539 if (SelectAddressPCRel(N, Base))
2540 return false;
2541
2542 // If this can be more profitably realized as r+r, fail.
2543 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2544 return false;
2545
2546 if (N.getOpcode() == ISD::ADD) {
2547 int16_t imm = 0;
2548 if (isIntS16Immediate(N.getOperand(1), imm) &&
2550 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2551 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2552 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2553 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2554 } else {
2555 Base = N.getOperand(0);
2556 }
2557 return true; // [r+i]
2558 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2559 // Match LOAD (ADD (X, Lo(G))).
2560 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2561 && "Cannot handle constant offsets yet!");
2562 Disp = N.getOperand(1).getOperand(0); // The global address.
2567 Base = N.getOperand(0);
2568 return true; // [&g+r]
2569 }
2570 } else if (N.getOpcode() == ISD::OR) {
2571 int16_t imm = 0;
2572 if (isIntS16Immediate(N.getOperand(1), imm) &&
2574 // If this is an or of disjoint bitfields, we can codegen this as an add
2575 // (for better address arithmetic) if the LHS and RHS of the OR are
2576 // provably disjoint.
2577 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2578
2579 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2580 // If all of the bits are known zero on the LHS or RHS, the add won't
2581 // carry.
2582 if (FrameIndexSDNode *FI =
2583 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2584 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2585 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2586 } else {
2587 Base = N.getOperand(0);
2588 }
2589 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2590 return true;
2591 }
2592 }
2594 // Loading from a constant address.
2595
2596 // If this address fits entirely in a 16-bit sext immediate field, codegen
2597 // this as "d, 0"
2598 int16_t Imm;
2599 if (isIntS16Immediate(CN, Imm) &&
2601 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2602 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2603 CN->getValueType(0));
2604 return true;
2605 }
2606
2607 // Handle 32-bit sext immediates with LIS + addr mode.
2608 if ((CN->getValueType(0) == MVT::i32 ||
2609 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2611 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2612 int Addr = (int)CN->getZExtValue();
2613
2614 // Otherwise, break this down into an LIS + disp.
2615 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2616
2617 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2618 MVT::i32);
2619 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2620 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2621 return true;
2622 }
2623 }
2624
2625 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2627 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2628 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2629 } else
2630 Base = N;
2631 return true; // [r+0]
2632}
2633
2634/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2635/// represented as an indexed [r+r] operation.
2637 SDValue &Index,
2638 SelectionDAG &DAG) const {
2639 // Check to see if we can easily represent this as an [r+r] address. This
2640 // will fail if it thinks that the address is more profitably represented as
2641 // reg+imm, e.g. where imm = 0.
2642 if (SelectAddressRegReg(N, Base, Index, DAG))
2643 return true;
2644
2645 // If the address is the result of an add, we will utilize the fact that the
2646 // address calculation includes an implicit add. However, we can reduce
2647 // register pressure if we do not materialize a constant just for use as the
2648 // index register. We only get rid of the add if it is not an add of a
2649 // value and a 16-bit signed constant and both have a single use.
2650 int16_t imm = 0;
2651 if (N.getOpcode() == ISD::ADD &&
2652 (!isIntS16Immediate(N.getOperand(1), imm) ||
2653 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2654 Base = N.getOperand(0);
2655 Index = N.getOperand(1);
2656 return true;
2657 }
2658
2659 // Otherwise, do it the hard way, using R0 as the base register.
2660 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2661 N.getValueType());
2662 Index = N;
2663 return true;
2664}
2665
2666template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2667 Ty *PCRelCand = dyn_cast<Ty>(N);
2668 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2669}
2670
2671/// Returns true if this address is a PC Relative address.
2672/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2673/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2675 // This is a materialize PC Relative node. Always select this as PC Relative.
2676 Base = N;
2677 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2678 return true;
2683 return true;
2684 return false;
2685}
2686
2687/// Returns true if we should use a direct load into vector instruction
2688/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2689static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2690
2691 // If there are any other uses other than scalar to vector, then we should
2692 // keep it as a scalar load -> direct move pattern to prevent multiple
2693 // loads.
2695 if (!LD)
2696 return false;
2697
2698 EVT MemVT = LD->getMemoryVT();
2699 if (!MemVT.isSimple())
2700 return false;
2701 switch(MemVT.getSimpleVT().SimpleTy) {
2702 case MVT::i64:
2703 break;
2704 case MVT::i32:
2705 if (!ST.hasP8Vector())
2706 return false;
2707 break;
2708 case MVT::i16:
2709 case MVT::i8:
2710 if (!ST.hasP9Vector())
2711 return false;
2712 break;
2713 default:
2714 return false;
2715 }
2716
2717 SDValue LoadedVal(N, 0);
2718 if (!LoadedVal.hasOneUse())
2719 return false;
2720
2721 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2722 UI != UE; ++UI)
2723 if (UI.getUse().get().getResNo() == 0 &&
2724 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2725 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2726 return false;
2727
2728 return true;
2729}
2730
2731/// getPreIndexedAddressParts - returns true by value, base pointer and
2732/// offset pointer and addressing mode by reference if the node's address
2733/// can be legally represented as pre-indexed load / store address.
2735 SDValue &Offset,
2737 SelectionDAG &DAG) const {
2738 if (DisablePPCPreinc) return false;
2739
2740 bool isLoad = true;
2741 SDValue Ptr;
2742 EVT VT;
2743 unsigned Alignment;
2744 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2745 Ptr = LD->getBasePtr();
2746 VT = LD->getMemoryVT();
2747 Alignment = LD->getAlignment();
2748 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2749 Ptr = ST->getBasePtr();
2750 VT = ST->getMemoryVT();
2751 Alignment = ST->getAlignment();
2752 isLoad = false;
2753 } else
2754 return false;
2755
2756 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2757 // instructions because we can fold these into a more efficient instruction
2758 // instead, (such as LXSD).
2759 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2760 return false;
2761 }
2762
2763 // PowerPC doesn't have preinc load/store instructions for vectors (except
2764 // for QPX, which does have preinc r+r forms).
2765 if (VT.isVector()) {
2766 if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
2767 return false;
2768 } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2769 AM = ISD::PRE_INC;
2770 return true;
2771 }
2772 }
2773
2774 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2775 // Common code will reject creating a pre-inc form if the base pointer
2776 // is a frame index, or if N is a store and the base pointer is either
2777 // the same as or a predecessor of the value being stored. Check for
2778 // those situations here, and try with swapped Base/Offset instead.
2779 bool Swap = false;
2780
2781 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2782 Swap = true;
2783 else if (!isLoad) {
2784 SDValue Val = cast<StoreSDNode>(N)->getValue();
2785 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2786 Swap = true;
2787 }
2788
2789 if (Swap)
2790 std::swap(Base, Offset);
2791
2792 AM = ISD::PRE_INC;
2793 return true;
2794 }
2795
2796 // LDU/STU can only handle immediates that are a multiple of 4.
2797 if (VT != MVT::i64) {
2798 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2799 return false;
2800 } else {
2801 // LDU/STU need an address with at least 4-byte alignment.
2802 if (Alignment < 4)
2803 return false;
2804
2805 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2806 return false;
2807 }
2808
2809 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2810 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2811 // sext i32 to i64 when addr mode is r+i.
2812 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2813 LD->getExtensionType() == ISD::SEXTLOAD &&
2814 isa<ConstantSDNode>(Offset))
2815 return false;
2816 }
2817
2818 AM = ISD::PRE_INC;
2819 return true;
2820}
2821
2822//===----------------------------------------------------------------------===//
2823// LowerOperation implementation
2824//===----------------------------------------------------------------------===//
2825
2826/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2827/// and LoOpFlags to the target MO flags.
2828static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2829 unsigned &HiOpFlags, unsigned &LoOpFlags,
2830 const GlobalValue *GV = nullptr) {
2833
2834 // Don't use the pic base if not in PIC relocation model.
2835 if (IsPIC) {
2838 }
2839}
2840
2842 SelectionDAG &DAG) {
2843 SDLoc DL(HiPart);
2844 EVT PtrVT = HiPart.getValueType();
2845 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2846
2847 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2848 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2849
2850 // With PIC, the first instruction is actually "GR+hi(&G)".
2851 if (isPIC)
2852 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2854
2855 // Generate non-pic code that has direct accesses to the constant pool.
2856 // The address of the global is just (hi(&g)+lo(&g)).
2857 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2858}
2859
2861 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2862 FuncInfo->setUsesTOCBasePtr();
2863}
2864
2868
2869SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2870 SDValue GA) const {
2871 const bool Is64Bit = Subtarget.isPPC64();
2872 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2873 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2874 : Subtarget.isAIXABI()
2875 ? DAG.getRegister(PPC::R2, VT)
2876 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2877 SDValue Ops[] = { GA, Reg };
2878 return DAG.getMemIntrinsicNode(
2879 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2882}
2883
2884SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2885 SelectionDAG &DAG) const {
2886 EVT PtrVT = Op.getValueType();
2888 const Constant *C = CP->getConstVal();
2889
2890 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2891 // The actual address of the GlobalValue is stored in the TOC.
2892 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2893 if (Subtarget.isUsingPCRelativeCalls()) {
2894 SDLoc DL(CP);
2895 EVT Ty = getPointerTy(DAG.getDataLayout());
2896 SDValue ConstPool = DAG.getTargetConstantPool(
2897 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2898 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2899 }
2900 setUsesTOCBasePtr(DAG);
2901 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2902 return getTOCEntry(DAG, SDLoc(CP), GA);
2903 }
2904
2905 unsigned MOHiFlag, MOLoFlag;
2906 bool IsPIC = isPositionIndependent();
2907 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2908
2909 if (IsPIC && Subtarget.isSVR4ABI()) {
2910 SDValue GA =
2912 return getTOCEntry(DAG, SDLoc(CP), GA);
2913 }
2914
2915 SDValue CPIHi =
2916 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2917 SDValue CPILo =
2918 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2919 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2920}
2921
2922// For 64-bit PowerPC, prefer the more compact relative encodings.
2923// This trades 32 bits per jump table entry for one or two instructions
2924// on the jump site.
2931
2934 return false;
2935 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2936 return true;
2938}
2939
2941 SelectionDAG &DAG) const {
2942 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2944
2945 switch (getTargetMachine().getCodeModel()) {
2946 case CodeModel::Small:
2947 case CodeModel::Medium:
2949 default:
2950 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2952 }
2953}
2954
2955const MCExpr *
2957 unsigned JTI,
2958 MCContext &Ctx) const {
2959 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2961
2962 switch (getTargetMachine().getCodeModel()) {
2963 case CodeModel::Small:
2964 case CodeModel::Medium:
2966 default:
2967 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2968 }
2969}
2970
2971SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2972 EVT PtrVT = Op.getValueType();
2974
2975 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2976 if (Subtarget.isUsingPCRelativeCalls()) {
2977 SDLoc DL(JT);
2978 EVT Ty = getPointerTy(DAG.getDataLayout());
2979 SDValue GA =
2980 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
2982 return MatAddr;
2983 }
2984
2985 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2986 // The actual address of the GlobalValue is stored in the TOC.
2987 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2988 setUsesTOCBasePtr(DAG);
2989 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2990 return getTOCEntry(DAG, SDLoc(JT), GA);
2991 }
2992
2993 unsigned MOHiFlag, MOLoFlag;
2994 bool IsPIC = isPositionIndependent();
2995 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2996
2997 if (IsPIC && Subtarget.isSVR4ABI()) {
2998 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3000 return getTOCEntry(DAG, SDLoc(GA), GA);
3001 }
3002
3003 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3004 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3005 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3006}
3007
3008SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3009 SelectionDAG &DAG) const {
3010 EVT PtrVT = Op.getValueType();
3012 const BlockAddress *BA = BASDN->getBlockAddress();
3013
3014 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3015 if (Subtarget.isUsingPCRelativeCalls()) {
3016 SDLoc DL(BASDN);
3017 EVT Ty = getPointerTy(DAG.getDataLayout());
3018 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3021 return MatAddr;
3022 }
3023
3024 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3025 // The actual BlockAddress is stored in the TOC.
3026 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3027 setUsesTOCBasePtr(DAG);
3028 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3029 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3030 }
3031
3032 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3033 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3034 return getTOCEntry(
3035 DAG, SDLoc(BASDN),
3036 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3037
3038 unsigned MOHiFlag, MOLoFlag;
3039 bool IsPIC = isPositionIndependent();
3040 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3043 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3044}
3045
3046SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3047 SelectionDAG &DAG) const {
3048 // FIXME: TLS addresses currently use medium model code sequences,
3049 // which is the most useful form. Eventually support for small and
3050 // large models could be added if users need it, at the cost of
3051 // additional complexity.
3053 if (DAG.getTarget().useEmulatedTLS())
3054 return LowerToTLSEmulatedModel(GA, DAG);
3055
3056 SDLoc dl(GA);
3057 const GlobalValue *GV = GA->getGlobal();
3059 bool is64bit = Subtarget.isPPC64();
3060 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3061 PICLevel::Level picLevel = M->getPICLevel();
3062
3064 TLSModel::Model Model = TM.getTLSModel(GV);
3065
3066 if (Model == TLSModel::LocalExec) {
3067 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3069 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3071 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3072 : DAG.getRegister(PPC::R2, MVT::i32);
3073
3074 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3075 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3076 }
3077
3078 if (Model == TLSModel::InitialExec) {
3079 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3080 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3083 if (is64bit) {
3084 setUsesTOCBasePtr(DAG);
3085 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3087 PtrVT, GOTReg, TGA);
3088 } else {
3089 if (!TM.isPositionIndependent())
3091 else if (picLevel == PICLevel::SmallPIC)
3093 else
3095 }
3097 PtrVT, TGA, GOTPtr);
3098 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3099 }
3100
3101 if (Model == TLSModel::GeneralDynamic) {
3102 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3104 if (is64bit) {
3105 setUsesTOCBasePtr(DAG);
3106 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3108 GOTReg, TGA);
3109 } else {
3112 else
3114 }
3115 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3116 GOTPtr, TGA, TGA);
3117 }
3118
3119 if (Model == TLSModel::LocalDynamic) {
3120 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3122 if (is64bit) {
3123 setUsesTOCBasePtr(DAG);
3124 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3126 GOTReg, TGA);
3127 } else {
3130 else
3132 }
3134 PtrVT, GOTPtr, TGA, TGA);
3136 PtrVT, TLSAddr, TGA);
3138 }
3139
3140 llvm_unreachable("Unknown TLS model!");
3141}
3142
3143SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3144 SelectionDAG &DAG) const {
3145 EVT PtrVT = Op.getValueType();
3147 SDLoc DL(GSDN);
3148 const GlobalValue *GV = GSDN->getGlobal();
3149
3150 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3151 // The actual address of the GlobalValue is stored in the TOC.
3152 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3153 if (Subtarget.isUsingPCRelativeCalls()) {
3154 EVT Ty = getPointerTy(DAG.getDataLayout());
3155 if (isAccessedAsGotIndirect(Op)) {
3156 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3162 return Load;
3163 } else {
3164 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3166 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3167 }
3168 }
3169 setUsesTOCBasePtr(DAG);
3170 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3171 return getTOCEntry(DAG, DL, GA);
3172 }
3173
3174 unsigned MOHiFlag, MOLoFlag;
3175 bool IsPIC = isPositionIndependent();
3176 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3177
3178 if (IsPIC && Subtarget.isSVR4ABI()) {
3180 GSDN->getOffset(),
3182 return getTOCEntry(DAG, DL, GA);
3183 }
3184
3185 SDValue GAHi =
3186 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3187 SDValue GALo =
3188 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3189
3190 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3191}
3192
3193SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3194 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3195 SDLoc dl(Op);
3196
3197 if (Op.getValueType() == MVT::v2i64) {
3198 // When the operands themselves are v2i64 values, we need to do something
3199 // special because VSX has no underlying comparison operations for these.
3200 if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3201 // Equality can be handled by casting to the legal type for Altivec
3202 // comparisons, everything else needs to be expanded.
3203 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3204 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3205 DAG.getSetCC(dl, MVT::v4i32,
3206 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3207 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3208 CC));
3209 }
3210
3211 return SDValue();
3212 }
3213
3214 // We handle most of these in the usual way.
3215 return Op;
3216 }
3217
3218 // If we're comparing for equality to zero, expose the fact that this is
3219 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3220 // fold the new nodes.
3221 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3222 return V;
3223
3224 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3225 // Leave comparisons against 0 and -1 alone for now, since they're usually
3226 // optimized. FIXME: revisit this when we can custom lower all setcc
3227 // optimizations.
3228 if (C->isAllOnesValue() || C->isNullValue())
3229 return SDValue();
3230 }
3231
3232 // If we have an integer seteq/setne, turn it into a compare against zero
3233 // by xor'ing the rhs with the lhs, which is faster than setting a
3234 // condition register, reading it back out, and masking the correct bit. The
3235 // normal approach here uses sub to do this instead of xor. Using xor exposes
3236 // the result to other bit-twiddling opportunities.
3237 EVT LHSVT = Op.getOperand(0).getValueType();
3238 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3239 EVT VT = Op.getValueType();
3240 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3241 Op.getOperand(1));
3242 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3243 }
3244 return SDValue();
3245}
3246
3247SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3248 SDNode *Node = Op.getNode();
3249 EVT VT = Node->getValueType(0);
3251 SDValue InChain = Node->getOperand(0);
3252 SDValue VAListPtr = Node->getOperand(1);
3253 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3254 SDLoc dl(Node);
3255
3256 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3257
3258 // gpr_index
3261 InChain = GprIndex.getValue(1);
3262
3263 if (VT == MVT::i64) {
3264 // Check if GprIndex is even
3266 DAG.getConstant(1, dl, MVT::i32));
3267 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3268 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3270 DAG.getConstant(1, dl, MVT::i32));
3271 // Align GprIndex to be even if it isn't
3273 GprIndex);
3274 }
3275
3276 // fpr index is 1 byte after gpr
3278 DAG.getConstant(1, dl, MVT::i32));
3279
3280 // fpr
3283 InChain = FprIndex.getValue(1);
3284
3286 DAG.getConstant(8, dl, MVT::i32));
3287
3289 DAG.getConstant(4, dl, MVT::i32));
3290
3291 // areas
3294 InChain = OverflowArea.getValue(1);
3295
3298 InChain = RegSaveArea.getValue(1);
3299
3300 // select overflow_area if index > 8
3301 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3302 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3303
3304 // adjustment constant gpr_index * 4/8
3306 VT.isInteger() ? GprIndex : FprIndex,
3307 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3308 MVT::i32));
3309
3310 // OurReg = RegSaveArea + RegConstant
3312 RegConstant);
3313
3314 // Floating types are 32 bytes into RegSaveArea
3315 if (VT.isFloatingPoint())
3316 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3317 DAG.getConstant(32, dl, MVT::i32));
3318
3319 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3321 VT.isInteger() ? GprIndex : FprIndex,
3322 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3323 MVT::i32));
3324
3326 VT.isInteger() ? VAListPtr : FprPtr,
3327 MachinePointerInfo(SV), MVT::i8);
3328
3329 // determine if we should load from reg_save_area or overflow_area
3331
3332 // increase overflow_area by 4/8 if gpr/fpr > 8
3334 DAG.getConstant(VT.isInteger() ? 4 : 8,
3335 dl, MVT::i32));
3336
3339
3342
3343 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3344}
3345
3346SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3347 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3348
3349 // We have to copy the entire va_list struct:
3350 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3351 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3352 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3353 false, true, false, MachinePointerInfo(),
3355}
3356
3357SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3358 SelectionDAG &DAG) const {
3359 if (Subtarget.isAIXABI())
3360 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3361
3362 return Op.getOperand(0);
3363}
3364
3365SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3366 SelectionDAG &DAG) const {
3367 if (Subtarget.isAIXABI())
3368 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3369
3370 SDValue Chain = Op.getOperand(0);
3371 SDValue Trmp = Op.getOperand(1); // trampoline
3372 SDValue FPtr = Op.getOperand(2); // nested function
3373 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3374 SDLoc dl(Op);
3375
3377 bool isPPC64 = (PtrVT == MVT::i64);
3378 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3379
3382
3383 Entry.Ty = IntPtrTy;
3384 Entry.Node = Trmp; Args.push_back(Entry);
3385
3386 // TrampSize == (isPPC64 ? 48 : 40);
3387 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3388 isPPC64 ? MVT::i64 : MVT::i32);
3389 Args.push_back(Entry);
3390
3391 Entry.Node = FPtr; Args.push_back(Entry);
3392 Entry.Node = Nest; Args.push_back(Entry);
3393
3394 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3396 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3398 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3399
3400 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3401 return CallResult.second;
3402}
3403
3404SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3406 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3408
3409 SDLoc dl(Op);
3410
3411 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3412 // vastart just stores the address of the VarArgsFrameIndex slot into the
3413 // memory location argument.
3415 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3416 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3417 MachinePointerInfo(SV));
3418 }
3419
3420 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3421 // We suppose the given va_list is already allocated.
3422 //
3423 // typedef struct {
3424 // char gpr; /* index into the array of 8 GPRs
3425 // * stored in the register save area
3426 // * gpr=0 corresponds to r3,
3427 // * gpr=1 to r4, etc.
3428 // */
3429 // char fpr; /* index into the array of 8 FPRs
3430 // * stored in the register save area
3431 // * fpr=0 corresponds to f1,
3432 // * fpr=1 to f2, etc.
3433 // */
3434 // char *overflow_arg_area;
3435 // /* location on stack that holds
3436 // * the next overflow argument
3437 // */
3438 // char *reg_save_area;
3439 // /* where r3:r10 and f1:f8 (if saved)
3440 // * are stored
3441 // */
3442 // } va_list[1];
3443
3444 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3445 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3447 PtrVT);
3448 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3449 PtrVT);
3450
3451 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3452 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3453
3454 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3456
3457 uint64_t FPROffset = 1;
3458 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3459
3460 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3461
3462 // Store first byte : number of int regs
3464 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3466 uint64_t nextOffset = FPROffset;
3467 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3469
3470 // Store second byte : number of float regs
3476
3477 // Store second word : arguments given on stack
3480 nextOffset += FrameOffset;
3482
3483 // Store third word : arguments given in registers
3484 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3486}
3487
3488/// FPR - The set of FP registers that should be allocated for arguments
3489/// on Darwin and AIX.
3490static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3491 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3492 PPC::F11, PPC::F12, PPC::F13};
3493
3494/// QFPR - The set of QPX registers that should be allocated for arguments.
3495static const MCPhysReg QFPR[] = {
3496 PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
3497 PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
3498
3499/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3500/// the stack.
3501static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3502 unsigned PtrByteSize) {
3503 unsigned ArgSize = ArgVT.getStoreSize();
3504 if (Flags.isByVal())
3505 ArgSize = Flags.getByValSize();
3506
3507 // Round up to multiples of the pointer size, except for array members,
3508 // which are always packed.
3509 if (!Flags.isInConsecutiveRegs())
3511
3512 return ArgSize;
3513}
3514
3515/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3516/// on the stack.
3518 ISD::ArgFlagsTy Flags,
3519 unsigned PtrByteSize) {
3520 Align Alignment(PtrByteSize);
3521
3522 // Altivec parameters are padded to a 16 byte boundary.
3523 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3524 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3525 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3526 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3527 Alignment = Align(16);
3528 // QPX vector types stored in double-precision are padded to a 32 byte
3529 // boundary.
3530 else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
3531 Alignment = Align(32);
3532
3533 // ByVal parameters are aligned as requested.
3534 if (Flags.isByVal()) {
3535 auto BVAlign = Flags.getNonZeroByValAlign();
3536 if (BVAlign > PtrByteSize) {
3537 if (BVAlign.value() % PtrByteSize != 0)
3539 "ByVal alignment is not a multiple of the pointer size");
3540
3541 Alignment = BVAlign;
3542 }
3543 }
3544
3545 // Array members are always packed to their original alignment.
3546 if (Flags.isInConsecutiveRegs()) {
3547 // If the array member was split into multiple registers, the first
3548 // needs to be aligned to the size of the full type. (Except for
3549 // ppcf128, which is only aligned as its f64 components.)
3550 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3551 Alignment = Align(OrigVT.getStoreSize());
3552 else
3553 Alignment = Align(ArgVT.getStoreSize());
3554 }
3555
3556 return Alignment;
3557}
3558
3559/// CalculateStackSlotUsed - Return whether this argument will use its
3560/// stack slot (instead of being passed in registers). ArgOffset,
3561/// AvailableFPRs, and AvailableVRs must hold the current argument
3562/// position, and will be updated to account for this argument.
3564 ISD::ArgFlagsTy Flags,
3565 unsigned PtrByteSize,
3566 unsigned LinkageSize,
3567 unsigned ParamAreaSize,
3568 unsigned &ArgOffset,
3569 unsigned &AvailableFPRs,
3570 unsigned &AvailableVRs, bool HasQPX) {
3571 bool UseMemory = false;
3572
3573 // Respect alignment of argument on the stack.
3574 Align Alignment =
3576 ArgOffset = alignTo(ArgOffset, Alignment);
3577 // If there's no space left in the argument save area, we must
3578 // use memory (this check also catches zero-sized arguments).
3579 if (ArgOffset >= LinkageSize + ParamAreaSize)
3580 UseMemory = true;
3581
3582 // Allocate argument on the stack.
3584 if (Flags.isInConsecutiveRegsLast())
3586 // If we overran the argument save area, we must use memory
3587 // (this check catches arguments passed partially in memory)
3588 if (ArgOffset > LinkageSize + ParamAreaSize)
3589 UseMemory = true;
3590
3591 // However, if the argument is actually passed in an FPR or a VR,
3592 // we don't use memory after all.
3593 if (!Flags.isByVal()) {
3594 if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
3595 // QPX registers overlap with the scalar FP registers.
3596 (HasQPX && (ArgVT == MVT::v4f32 ||
3597 ArgVT == MVT::v4f64 ||
3598 ArgVT == MVT::v4i1)))
3599 if (AvailableFPRs > 0) {
3600 --AvailableFPRs;
3601 return false;
3602 }
3603 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3604 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3605 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3606 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3607 if (AvailableVRs > 0) {
3608 --AvailableVRs;
3609 return false;
3610 }
3611 }
3612
3613 return UseMemory;
3614}
3615
3616/// EnsureStackAlignment - Round stack frame size up from NumBytes to
3617/// ensure minimum alignment required for target.
3619 unsigned NumBytes) {
3620 return alignTo(NumBytes, Lowering->getStackAlign());
3621}
3622
3623SDValue PPCTargetLowering::LowerFormalArguments(
3624 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3625 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3626 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3627 if (Subtarget.isAIXABI())
3628 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3629 InVals);
3630 if (Subtarget.is64BitELFABI())
3631 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3632 InVals);
3633 if (Subtarget.is32BitELFABI())
3634 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3635 InVals);
3636
3637 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3638 InVals);
3639}
3640
3641SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3642 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3643 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3644 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3645
3646 // 32-bit SVR4 ABI Stack Frame Layout:
3647 // +-----------------------------------+
3648 // +--> | Back chain |
3649 // | +-----------------------------------+
3650 // | | Floating-point register save area |
3651 // | +-----------------------------------+
3652 // | | General register save area |
3653 // | +-----------------------------------+
3654 // | | CR save word |
3655 // | +-----------------------------------+
3656 // | | VRSAVE save word |
3657 // | +-----------------------------------+
3658 // | | Alignment padding |
3659 // | +-----------------------------------+
3660 // | | Vector register save area |
3661 // | +-----------------------------------+
3662 // | | Local variable space |
3663 // | +-----------------------------------+
3664 // | | Parameter list area |
3665 // | +-----------------------------------+
3666 // | | LR save word |
3667 // | +-----------------------------------+
3668 // SP--> +--- | Back chain |
3669 // +-----------------------------------+
3670 //
3671 // Specifications:
3672 // System V Application Binary Interface PowerPC Processor Supplement
3673 // AltiVec Technology Programming Interface Manual
3674
3676 MachineFrameInfo &MFI = MF.getFrameInfo();
3677 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3678
3680 // Potential tail calls could cause overwriting of argument stack slots.
3681 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3682 (CallConv == CallingConv::Fast));
3683 const Align PtrAlign(4);
3684
3685 // Assign locations to all of the incoming arguments.
3687 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3688 *DAG.getContext());
3689
3690 // Reserve space for the linkage area on the stack.
3691 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3692 CCInfo.AllocateStack(LinkageSize, PtrAlign);
3693 if (useSoftFloat())
3694 CCInfo.PreAnalyzeFormalArguments(Ins);
3695
3696 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3697 CCInfo.clearWasPPCF128();
3698
3699 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3700 CCValAssign &VA = ArgLocs[i];
3701
3702 // Arguments stored in registers.
3703 if (VA.isRegLoc()) {
3704 const TargetRegisterClass *RC;
3705 EVT ValVT = VA.getValVT();
3706
3707 switch (ValVT.getSimpleVT().SimpleTy) {
3708 default:
3709 llvm_unreachable("ValVT not supported by formal arguments Lowering");
3710 case MVT::i1:
3711 case MVT::i32:
3712 RC = &PPC::GPRCRegClass;
3713 break;
3714 case MVT::f32:
3715 if (Subtarget.hasP8Vector())
3716 RC = &PPC::VSSRCRegClass;
3717 else if (Subtarget.hasSPE())
3718 RC = &PPC::GPRCRegClass;
3719 else
3720 RC = &PPC::F4RCRegClass;
3721 break;
3722 case MVT::f64:
3723 if (Subtarget.hasVSX())
3724 RC = &PPC::VSFRCRegClass;
3725 else if (Subtarget.hasSPE())
3726 // SPE passes doubles in GPR pairs.
3727 RC = &PPC::GPRCRegClass;
3728 else
3729 RC = &PPC::F8RCRegClass;
3730 break;
3731 case MVT::v16i8:
3732 case MVT::v8i16:
3733 case MVT::v4i32:
3734 RC = &PPC::VRRCRegClass;
3735 break;
3736 case MVT::v4f32:
3737 RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
3738 break;
3739 case MVT::v2f64:
3740 case MVT::v2i64:
3741 RC = &PPC::VRRCRegClass;
3742 break;
3743 case MVT::v4f64:
3744 RC = &PPC::QFRCRegClass;
3745 break;
3746 case MVT::v4i1:
3747 RC = &PPC::QBRCRegClass;
3748 break;
3749 }
3750
3751 SDValue ArgValue;
3752 // Transform the arguments stored in physical registers into
3753 // virtual ones.
3754 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3755 assert(i + 1 < e && "No second half of double precision argument");
3756 unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3757 unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3758 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3759 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3760 if (!Subtarget.isLittleEndian())
3762 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3763 ArgValueHi);
3764 } else {
3765 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3766 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3767 ValVT == MVT::i1 ? MVT::i32 : ValVT);
3768 if (ValVT == MVT::i1)
3769 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3770 }
3771
3772 InVals.push_back(ArgValue);
3773 } else {
3774 // Argument stored in memory.
3775 assert(VA.isMemLoc());
3776
3777 // Get the extended size of the argument type in stack
3778 unsigned ArgSize = VA.getLocVT().getStoreSize();
3779 // Get the actual size of the argument type
3780 unsigned ObjSize = VA.getValVT().getStoreSize();
3781 unsigned ArgOffset = VA.getLocMemOffset();
3782 // Stack objects in PPC32 are right justified.
3784 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3785
3786 // Create load nodes to retrieve arguments from the stack.
3787 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3788 InVals.push_back(
3789 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3790 }
3791 }
3792
3793 // Assign locations to all of the incoming aggregate by value arguments.
3794 // Aggregates passed by value are stored in the local variable space of the
3795 // caller's stack frame, right above the parameter list area.
3797 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3798 ByValArgLocs, *DAG.getContext());
3799
3800 // Reserve stack space for the allocations in CCInfo.
3801 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3802
3803 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3804
3805 // Area that is at least reserved in the caller of this function.
3806 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3807 MinReservedArea = std::max(MinReservedArea, LinkageSize);
3808
3809 // Set the size that is at least reserved in caller of this function. Tail
3810 // call optimized function's reserved stack space needs to be aligned so that
3811 // taking the difference between two stack areas will result in an aligned
3812 // stack.
3813 MinReservedArea =
3814 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3815 FuncInfo->setMinReservedArea(MinReservedArea);
3816
3818
3819 // If the function takes variable number of arguments, make a frame index for
3820 // the start of the first vararg value... for expansion of llvm.va_start.
3821 if (isVarArg) {
3822 static const MCPhysReg GPArgRegs[] = {
3823 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3824 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3825 };
3826 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3827
3828 static const MCPhysReg FPArgRegs[] = {
3829 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3830 PPC::F8
3831 };
3833
3834 if (useSoftFloat() || hasSPE())
3835 NumFPArgRegs = 0;
3836
3837 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3838 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3839
3840 // Make room for NumGPArgRegs and NumFPArgRegs.
3841 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3843
3844 FuncInfo->setVarArgsStackOffset(
3845 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3846 CCInfo.getNextStackOffset(), true));
3847
3848 FuncInfo->setVarArgsFrameIndex(
3849 MFI.CreateStackObject(Depth, Align(8), false));
3851
3852 // The fixed integer arguments of a variadic function are stored to the
3853 // VarArgsFrameIndex on the stack so that they may be loaded by
3854 // dereferencing the result of va_next.
3855 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3856 // Get an existing live-in vreg, or add a new one.
3857 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3858 if (!VReg)
3859 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3860
3861 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3862 SDValue Store =
3863 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3864 MemOps.push_back(Store);
3865 // Increment the address by four for the next argument to store
3866 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3867 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3868 }
3869
3870 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3871 // is set.
3872 // The double arguments are stored to the VarArgsFrameIndex
3873 // on the stack.
3874 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3875 // Get an existing live-in vreg, or add a new one.
3876 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3877 if (!VReg)
3878 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3879
3880 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3881 SDValue Store =
3882 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3883 MemOps.push_back(Store);
3884 // Increment the address by eight for the next argument to store
3885 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3886 PtrVT);
3887 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3888 }
3889 }
3890
3891 if (!MemOps.empty())
3892 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3893
3894 return Chain;
3895}
3896
3897// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3898// value to MVT::i64 and then truncate to the correct register size.
3899SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3902 const SDLoc &dl) const {
3903 if (Flags.isSExt())
3905 DAG.getValueType(ObjectVT));
3906 else if (Flags.isZExt())
3908 DAG.getValueType(ObjectVT));
3909
3910 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3911}
3912
3913SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3914 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3915 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3916 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3917 // TODO: add description of PPC stack frame format, or at least some docs.
3918 //
3919 bool isELFv2ABI = Subtarget.isELFv2ABI();
3920 bool isLittleEndian = Subtarget.isLittleEndian();
3922 MachineFrameInfo &MFI = MF.getFrameInfo();
3923 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3924
3925 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3926 "fastcc not supported on varargs functions");
3927
3929 // Potential tail calls could cause overwriting of argument stack slots.
3930 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3931 (CallConv == CallingConv::Fast));
3932 unsigned PtrByteSize = 8;
3933 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3934
3935 static const MCPhysReg GPR[] = {
3936 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3937 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3938 };
3939 static const MCPhysReg VR[] = {
3940 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3941 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3942 };
3943
3944 const unsigned Num_GPR_Regs = array_lengthof(GPR);
3945 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3946 const unsigned Num_VR_Regs = array_lengthof(VR);
3947 const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3948
3949 // Do a first pass over the arguments to determine whether the ABI
3950 // guarantees that our caller has allocated the parameter save area
3951 // on its stack frame. In the ELFv1 ABI, this is always the case;
3952 // in the ELFv2 ABI, it is true if this is a vararg function or if
3953 // any parameter is located in a stack slot.
3954
3955 bool HasParameterArea = !isELFv2ABI || isVarArg;
3957 unsigned NumBytes = LinkageSize;
3958 unsigned AvailableFPRs = Num_FPR_Regs;
3959 unsigned AvailableVRs = Num_VR_Regs;
3960 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3961 if (Ins[i].Flags.isNest())
3962 continue;
3963
3964 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3965 PtrByteSize, LinkageSize, ParamAreaSize,
3966 NumBytes, AvailableFPRs, AvailableVRs,
3967 Subtarget.hasQPX()))
3968 HasParameterArea = true;
3969 }
3970
3971 // Add DAG nodes to load the arguments or copy them out of registers. On
3972 // entry to a function on PPC, the arguments start after the linkage area,
3973 // although the first ones are often in registers.
3974
3975 unsigned ArgOffset = LinkageSize;
3976 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3977 unsigned &QFPR_idx = FPR_idx;
3980 unsigned CurArgIdx = 0;
3981 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3983 bool needsLoad = false;
3984 EVT ObjectVT = Ins[ArgNo].VT;
3985 EVT OrigVT = Ins[ArgNo].ArgVT;
3986 unsigned ObjSize = ObjectVT.getStoreSize();
3987 unsigned ArgSize = ObjSize;
3988 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3989 if (Ins[ArgNo].isOrigArg()) {
3990 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3991 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3992 }
3993 // We re-align the argument offset for each argument, except when using the
3994 // fast calling convention, when we need to make sure we do that only when
3995 // we'll actually use a stack slot.
3996 unsigned CurArgOffset;
3997 Align Alignment;
3998 auto ComputeArgOffset = [&]() {
3999 /* Respect alignment of argument on the stack. */
4000 Alignment =
4002 ArgOffset = alignTo(ArgOffset, Alignment);
4004 };
4005
4006 if (CallConv != CallingConv::Fast) {
4008
4009 /* Compute GPR index associated with argument offset. */
4010 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4011 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4012 }
4013
4014 // FIXME the codegen can be much improved in some cases.
4015 // We do not have to keep everything in memory.
4016 if (Flags.isByVal()) {
4017 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4018
4019 if (CallConv == CallingConv::Fast)
4021
4022 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4023 ObjSize = Flags.getByValSize();
4025 // Empty aggregate parameters do not take up registers. Examples:
4026 // struct { } a;
4027 // union { } b;
4028 // int c[0];
4029 // etc. However, we have to provide a place-holder in InVals, so
4030 // pretend we have an 8-byte item at the current address for that
4031 // purpose.
4032 if (!ObjSize) {
4033 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4034 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4035 InVals.push_back(FIN);
4036 continue;
4037 }
4038
4039 // Create a stack object covering all stack doublewords occupied
4040 // by the argument. If the argument is (fully or partially) on
4041 // the stack, or if the argument is fully in registers but the
4042 // caller has allocated the parameter save anyway, we can refer
4043 // directly to the caller's stack frame. Otherwise, create a
4044 // local copy in our own frame.
4045 int FI;
4046 if (HasParameterArea ||
4047 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4048 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4049 else
4050 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4051 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4052
4053 // Handle aggregates smaller than 8 bytes.
4054 if (ObjSize < PtrByteSize) {
4055 // The value of the object is its address, which differs from the
4056 // address of the enclosing doubleword on big-endian systems.
4057 SDValue Arg = FIN;
4058 if (!isLittleEndian) {
4060 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4061 }
4062 InVals.push_back(Arg);
4063
4064 if (GPR_idx != Num_GPR_Regs) {
4065 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4066 FuncInfo->addLiveInAttr(VReg, Flags);
4067 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4068 SDValue Store;
4069
4070 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4071 EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4072 (ObjSize == 2 ? MVT::i16 : MVT::i32));
4073 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4075 } else {
4076 // For sizes that don't fit a truncating store (3, 5, 6, 7),
4077 // store the whole register as-is to the parameter save area
4078 // slot.
4079 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4081 }
4082
4083 MemOps.push_back(Store);
4084 }
4085 // Whether we copied from a register or not, advance the offset
4086 // into the parameter save area by a full doubleword.
4088 continue;
4089 }
4090
4091 // The value of the object is its address, which is the address of
4092 // its first stack doubleword.
4093 InVals.push_back(FIN);
4094
4095 // Store whatever pieces of the object are in registers to memory.
4096 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4097 if (GPR_idx == Num_GPR_Regs)
4098 break;
4099
4100 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4101 FuncInfo->addLiveInAttr(VReg, Flags);
4102 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4103 SDValue Addr = FIN;
4104 if (j) {
4105 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4106 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4107 }
4108 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4110 MemOps.push_back(Store);
4111 ++GPR_idx;
4112 }
4113 ArgOffset += ArgSize;
4114 continue;
4115 }
4116
4117 switch (ObjectVT.getSimpleVT().SimpleTy) {
4118 default: llvm_unreachable("Unhandled argument type!");
4119 case MVT::i1:
4120 case MVT::i32:
4121 case MVT::i64:
4122 if (Flags.isNest()) {
4123 // The 'nest' parameter, if any, is passed in R11.
4124 unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4125 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4126
4127 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4128 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4129
4130 break;
4131 }
4132
4133 // These can be scalar arguments or elements of an integer array type
4134 // passed directly. Clang may use those instead of "byval" aggregate
4135 // types to avoid forcing arguments to memory unnecessarily.
4136 if (GPR_idx != Num_GPR_Regs) {
4137 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4138 FuncInfo->addLiveInAttr(VReg, Flags);
4139 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4140
4141 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4142 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4143 // value to MVT::i64 and then truncate to the correct register size.
4144 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4145 } else {
4146 if (CallConv == CallingConv::Fast)
4148
4149 needsLoad = true;
4151 }
4152 if (CallConv != CallingConv::Fast || needsLoad)
4153 ArgOffset += 8;
4154 break;
4155
4156 case MVT::f32:
4157 case MVT::f64:
4158 // These can be scalar arguments or elements of a float array type
4159 // passed directly. The latter are used to implement ELFv2 homogenous
4160 // float aggregates.
4161 if (FPR_idx != Num_FPR_Regs) {
4162 unsigned VReg;
4163
4164 if (ObjectVT == MVT::f32)
4165 VReg = MF.addLiveIn(FPR[FPR_idx],
4166 Subtarget.hasP8Vector()
4167 ? &PPC::VSSRCRegClass
4168 : &PPC::F4RCRegClass);
4169 else
4170 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4171 ? &PPC::VSFRCRegClass
4172 : &PPC::F8RCRegClass);
4173
4174 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4175 ++FPR_idx;
4176 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4177 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4178 // once we support fp <-> gpr moves.
4179
4180 // This can only ever happen in the presence of f32 array types,
4181 // since otherwise we never run out of FPRs before running out
4182 // of GPRs.
4183 unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4184 FuncInfo->addLiveInAttr(VReg, Flags);
4185 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4186
4187 if (ObjectVT == MVT::f32) {
4188 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4189 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4190 DAG.getConstant(32, dl, MVT::i32));
4192 }
4193
4195 } else {
4196 if (CallConv == CallingConv::Fast)
4198
4199 needsLoad = true;
4200 }
4201
4202 // When passing an array of floats, the array occupies consecutive
4203 // space in the argument area; only round up to the next doubleword
4204 // at the end of the array. Otherwise, each float takes 8 bytes.
4205 if (CallConv != CallingConv::Fast || needsLoad) {
4206 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4207 ArgOffset += ArgSize;
4208 if (Flags.isInConsecutiveRegsLast())
4210 }
4211 break;
4212 case MVT::v4f32:
4213 case MVT::v4i32:
4214 case MVT::v8i16:
4215 case MVT::v16i8:
4216 case MVT::v2f64:
4217 case MVT::v2i64:
4218 case MVT::v1i128:
4219 case MVT::f128:
4220 if (!Subtarget.hasQPX()) {
4221 // These can be scalar arguments or elements of a vector array type
4222 // passed directly. The latter are used to implement ELFv2 homogenous
4223 // vector aggregates.
4224 if (VR_idx != Num_VR_Regs) {
4225 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4226 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4227 ++VR_idx;
4228 } else {
4229 if (CallConv == CallingConv::Fast)
4231 needsLoad = true;
4232 }
4233 if (CallConv != CallingConv::Fast || needsLoad)
4234 ArgOffset += 16;
4235 break;
4236 } // not QPX
4237
4238 assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
4239 "Invalid QPX parameter type");
4241
4242 case MVT::v4f64:
4243 case MVT::v4i1:
4244 // QPX vectors are treated like their scalar floating-point subregisters
4245 // (except that they're larger).
4246 unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
4247 if (QFPR_idx != Num_QFPR_Regs) {
4248 const TargetRegisterClass *RC;
4249 switch (ObjectVT.getSimpleVT().SimpleTy) {
4250 case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
4251 case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
4252 default: RC = &PPC::QBRCRegClass; break;
4253 }
4254
4255 unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
4256 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4257 ++QFPR_idx;
4258 } else {
4259 if (CallConv == CallingConv::Fast)
4261 needsLoad = true;
4262 }
4263 if (CallConv != CallingConv::Fast || needsLoad)
4264 ArgOffset += Sz;
4265 break;
4266 }
4267
4268 // We need to load the argument to a virtual register if we determined
4269 // above that we ran out of physical registers of the appropriate type.
4270 if (needsLoad) {
4271 if (ObjSize < ArgSize && !isLittleEndian)
4273 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4274 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4275 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4276 }
4277
4278 InVals.push_back(ArgVal);
4279 }
4280
4281 // Area that is at least reserved in the caller of this function.
4282 unsigned MinReservedArea;
4283 if (HasParameterArea)
4284 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4285 else
4286 MinReservedArea = LinkageSize;
4287
4288 // Set the size that is at least reserved in caller of this function. Tail
4289 // call optimized functions' reserved stack space needs to be aligned so that
4290 // taking the difference between two stack areas will result in an aligned
4291 // stack.
4292 MinReservedArea =
4293 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4294 FuncInfo->setMinReservedArea(MinReservedArea);
4295
4296 // If the function takes variable number of arguments, make a frame index for
4297 // the start of the first vararg value... for expansion of llvm.va_start.
4298 // On ELFv2ABI spec, it writes:
4299 // C programs that are intended to be *portable* across different compilers
4300 // and architectures must use the header file <stdarg.h> to deal with variable
4301 // argument lists.
4302 if (isVarArg && MFI.hasVAStart()) {
4303 int Depth = ArgOffset;
4304
4305 FuncInfo->setVarArgsFrameIndex(
4306 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4308
4309 // If this function is vararg, store any remaining integer argument regs
4310 // to their spots on the stack so that they may be loaded by dereferencing
4311 // the result of va_next.
4312 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4314 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4315 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4316 SDValue Store =
4317 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4318 MemOps.push_back(Store);
4319 // Increment the address by four for the next argument to store
4321 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4322 }
4323 }
4324
4325 if (!MemOps.empty())
4326 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4327
4328 return Chain;
4329}
4330
4331SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4332 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4333 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4334 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4335 // TODO: add description of PPC stack frame format, or at least some docs.
4336 //
4338 MachineFrameInfo &MFI = MF.getFrameInfo();
4339 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4340
4342 bool isPPC64 = PtrVT == MVT::i64;
4343 // Potential tail calls could cause overwriting of argument stack slots.
4344 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4345 (CallConv == CallingConv::Fast));
4346 unsigned PtrByteSize = isPPC64 ? 8 : 4;
4347 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4348 unsigned ArgOffset = LinkageSize;
4349 // Area that is at least reserved in caller of this function.
4350 unsigned MinReservedArea = ArgOffset;
4351
4352 static const MCPhysReg GPR_32[] = { // 32-bit registers.
4353 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4354 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4355 };
4356 static const MCPhysReg GPR_64[] = { // 64-bit registers.
4357 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4358 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4359 };
4360 static const MCPhysReg VR[] = {
4361 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4362 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4363 };
4364
4365 const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4366 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4367 const unsigned Num_VR_Regs = array_lengthof( VR);
4368
4369 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4370
4371 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4372
4373 // In 32-bit non-varargs functions, the stack space for vectors is after the
4374 // stack space for non-vectors. We do not use this space unless we have
4375 // too many vectors to fit in registers, something that only occurs in
4376 // constructed examples:), but we have to walk the arglist to figure
4377 // that out...for the pathological case, compute VecArgOffset as the
4378 // start of the vector parameter area. Computing VecArgOffset is the
4379 // entire point of the following loop.
4380 unsigned VecArgOffset = ArgOffset;
4381 if (!isVarArg && !isPPC64) {
4382 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4383 ++ArgNo) {
4384 EVT ObjectVT = Ins[ArgNo].VT;
4385 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4386
4387 if (Flags.isByVal()) {
4388 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4389 unsigned ObjSize = Flags.getByValSize();
4390 unsigned ArgSize =
4393 continue;
4394 }
4395
4396 switch(ObjectVT.getSimpleVT().SimpleTy) {
4397 default: llvm_unreachable("Unhandled argument type!");
4398 case MVT::i1:
4399 case MVT::i32:
4400 case MVT::f32:
4401 VecArgOffset += 4;
4402 break;
4403 case MVT::i64: // PPC64
4404 case MVT::f64:
4405 // FIXME: We are guaranteed to be !isPPC64 at this point.
4406 // Does MVT::i64 apply?
4407 VecArgOffset += 8;
4408 break;
4409 case MVT::v4f32:
4410 case MVT::v4i32:
4411 case MVT::v8i16:
4412 case MVT::v16i8:
4413 // Nothing to do, we're only looking at Nonvector args here.
4414 break;
4415 }
4416 }
4417 }
4418 // We've found where the vector parameter area in memory is. Skip the
4419 // first 12 parameters; these don't use that memory.
4420 VecArgOffset = ((VecArgOffset+15)/16)*16;
4421 VecArgOffset += 12*16;
4422
4423 // Add DAG nodes to load the arguments or copy them out of registers. On
4424 // entry to a function on PPC, the arguments start after the linkage area,
4425 // although the first ones are often in registers.
4426
4428 unsigned nAltivecParamsAtEnd = 0;
4430 unsigned CurArgIdx = 0;
4431 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4433 bool needsLoad = false;
4434 EVT ObjectVT = Ins[ArgNo].VT;
4435 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4436 unsigned ArgSize = ObjSize;
4437 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4438 if (Ins[ArgNo].isOrigArg()) {
4439 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4440 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4441 }
4442 unsigned CurArgOffset = ArgOffset;
4443
4444 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4447 if (isVarArg || isPPC64) {
4448 MinReservedArea = ((MinReservedArea+15)/16)*16;
4449 MinReservedArea += CalculateStackSlotSize(ObjectVT,
4450 Flags,
4451 PtrByteSize);
4452 } else nAltivecParamsAtEnd++;
4453 } else
4454 // Calculate min reserved area.
4455 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4456 Flags,
4457 PtrByteSize);
4458
4459 // FIXME the codegen can be much improved in some cases.
4460 // We do not have to keep everything in memory.
4461 if (Flags.isByVal()) {
4462 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4463
4464 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4465 ObjSize = Flags.getByValSize();
4467 // Objects of size 1 and 2 are right justified, everything else is
4468 // left justified. This means the memory address is adjusted forwards.
4469 if (ObjSize==1 || ObjSize==2) {
4471 }
4472 // The value of the object is its address.
4473 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4474 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4475 InVals.push_back(FIN);
4476 if (ObjSize==1 || ObjSize==2) {
4477 if (GPR_idx != Num_GPR_Regs) {
4478 unsigned VReg;
4479 if (isPPC64)
4480 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4481 else
4482 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4483 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4484 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4485 SDValue Store =
4486 DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4488 MemOps.push_back(Store);
4489 ++GPR_idx;
4490 }
4491
4493
4494 continue;
4495 }
4496 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4497 // Store whatever pieces of the object are in registers
4498 // to memory. ArgOffset will be the address of the beginning
4499 // of the object.
4500 if (GPR_idx != Num_GPR_Regs) {
4501 unsigned VReg;
4502 if (isPPC64)
4503 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4504 else
4505 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4506 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4507 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4508 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4509 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4511 MemOps.push_back(Store);
4512 ++GPR_idx;
4514 } else {
4516 break;
4517 }
4518 }
4519 continue;
4520 }
4521
4522 switch (ObjectVT.getSimpleVT().SimpleTy) {
4523 default: llvm_unreachable("Unhandled argument type!");
4524 case MVT::i1:
4525 case MVT::i32:
4526 if (!isPPC64) {
4527 if (GPR_idx != Num_GPR_Regs) {
4528 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4529 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4530
4531 if (ObjectVT == MVT::i1)
4533
4534 ++GPR_idx;
4535 } else {
4536 needsLoad = true;
4538 }
4539 // All int arguments reserve stack space in the Darwin ABI.
4541 break;
4542 }
4544 case MVT::i64: // PPC64
4545 if (GPR_idx != Num_GPR_Regs) {
4546 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4547 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4548
4549 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4550 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4551 // value to MVT::i64 and then truncate to the correct register size.
4552 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4553
4554 ++GPR_idx;
4555 } else {
4556 needsLoad = true;
4558 }
4559 // All int arguments reserve stack space in the Darwin ABI.
4560 ArgOffset += 8;
4561 break;
4562
4563 case MVT::f32:
4564 case MVT::f64:
4565 // Every 4 bytes of argument space consumes one of the GPRs available for
4566 // argument passing.
4567 if (GPR_idx != Num_GPR_Regs) {
4568 ++GPR_idx;
4569 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4570 ++GPR_idx;
4571 }
4572 if (FPR_idx != Num_FPR_Regs) {
4573 unsigned VReg;
4574
4575 if (ObjectVT == MVT::f32)
4576 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4577 else
4578 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4579
4580 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4581 ++FPR_idx;
4582 } else {
4583 needsLoad = true;
4584 }
4585
4586 // All FP arguments reserve stack space in the Darwin ABI.
4587 ArgOffset += isPPC64 ? 8 : ObjSize;
4588 break;
4589 case MVT::v4f32:
4590 case MVT::v4i32:
4591 case MVT::v8i16:
4592 case MVT::v16i8:
4593 // Note that vector arguments in registers don't reserve stack space,
4594 // except in varargs functions.
4595 if (VR_idx != Num_VR_Regs) {
4596 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4597 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4598 if (isVarArg) {
4599 while ((ArgOffset % 16) != 0) {
4601 if (GPR_idx != Num_GPR_Regs)
4602 GPR_idx++;
4603 }
4604 ArgOffset += 16;
4605 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4606 }
4607 ++VR_idx;
4608 } else {
4609 if (!isVarArg && !isPPC64) {
4610 // Vectors go after all the nonvectors.
4612 VecArgOffset += 16;
4613 } else {
4614 // Vectors are aligned.
4615 ArgOffset = ((ArgOffset+15)/16)*16;
4617 ArgOffset += 16;
4618 }
4619 needsLoad = true;
4620 }
4621 break;
4622 }
4623
4624 // We need to load the argument to a virtual register if we determined above
4625 // that we ran out of physical registers of the appropriate type.
4626 if (needsLoad) {
4627 int FI = MFI.CreateFixedObject(ObjSize,
4629 isImmutable);
4630 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4631 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4632 }
4633
4634 InVals.push_back(ArgVal);
4635 }
4636
4637 // Allow for Altivec parameters at the end, if needed.
4638 if (nAltivecParamsAtEnd) {
4639 MinReservedArea = ((MinReservedArea+15)/16)*16;
4640 MinReservedArea += 16*nAltivecParamsAtEnd;
4641 }
4642
4643 // Area that is at least reserved in the caller of this function.
4644 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4645
4646 // Set the size that is at least reserved in caller of this function. Tail
4647 // call optimized functions' reserved stack space needs to be aligned so that
4648 // taking the difference between two stack areas will result in an aligned
4649 // stack.
4650 MinReservedArea =
4651 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4652 FuncInfo->setMinReservedArea(MinReservedArea);
4653
4654 // If the function takes variable number of arguments, make a frame index for
4655 // the start of the first vararg value... for expansion of llvm.va_start.
4656 if (isVarArg) {
4657 int Depth = ArgOffset;
4658
4659 FuncInfo->setVarArgsFrameIndex(
4660 MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4661 Depth, true));
4663
4664 // If this function is vararg, store any remaining integer argument regs
4665 // to their spots on the stack so that they may be loaded by dereferencing
4666 // the result of va_next.
4667 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4668 unsigned VReg;
4669
4670 if (isPPC64)
4671 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4672 else
4673 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4674
4675 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4676 SDValue Store =
4677 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4678 MemOps.push_back(Store);
4679 // Increment the address by four for the next argument to store
4680 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4681 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4682 }
4683 }
4684
4685 if (!MemOps.empty())
4686 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4687
4688 return Chain;
4689}
4690
4691/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4692/// adjusted to accommodate the arguments for the tailcall.
4693static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4694 unsigned ParamSize) {
4695
4696 if (!isTailCall) return 0;
4697
4700 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4701 // Remember only if the new adjustment is bigger.
4702 if (SPDiff < FI->getTailCallSPDelta())
4704
4705 return SPDiff;
4706}
4707
4709
4710static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4711 const TargetMachine &TM) {
4712 // It does not make sense to call callsShareTOCBase() with a caller that
4713 // is PC Relative since PC Relative callers do not have a TOC.
4714#ifndef NDEBUG
4715 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4716 assert(!STICaller->isUsingPCRelativeCalls() &&
4717 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4718#endif
4719
4720 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4721 // don't have enough information to determine if the caller and callee share
4722 // the same TOC base, so we have to pessimistically assume they don't for
4723 // correctness.
4725 if (!G)
4726 return false;
4727
4728 const GlobalValue *GV = G->getGlobal();
4729
4730 // If the callee is preemptable, then the static linker will use a plt-stub
4731 // which saves the toc to the stack, and needs a nop after the call
4732 // instruction to convert to a toc-restore.
4733 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4734 return false;
4735
4736 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4737 // We may need a TOC restore in the situation where the caller requires a
4738 // valid TOC but the callee is PC Relative and does not.
4739 const Function *F = dyn_cast<Function>(GV);
4740 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4741
4742 // If we have an Alias we can try to get the function from there.
4743 if (Alias) {
4744 const GlobalObject *GlobalObj = Alias->getBaseObject();
4746 }
4747
4748 // If we still have no valid function pointer we do not have enough
4749 // information to determine if the callee uses PC Relative calls so we must
4750 // assume that it does.
4751 if (!F)
4752 return false;
4753
4754 // If the callee uses PC Relative we cannot guarantee that the callee won't
4755 // clobber the TOC of the caller and so we must assume that the two
4756 // functions do not share a TOC base.
4757 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4758 if (STICallee->isUsingPCRelativeCalls())
4759 return false;
4760
4761 // The medium and large code models are expected to provide a sufficiently
4762 // large TOC to provide all data addressing needs of a module with a
4763 // single TOC.
4764 if (CodeModel::Medium == TM.getCodeModel() ||
4765 CodeModel::Large == TM.getCodeModel())
4766 return true;
4767
4768 // Otherwise we need to ensure callee and caller are in the same section,
4769 // since the linker may allocate multiple TOCs, and we don't know which
4770 // sections will belong to the same TOC base.
4771 if (!GV->isStrongDefinitionForLinker())
4772 return false;
4773
4774 // Any explicitly-specified sections and section prefixes must also match.
4775 // Also, if we're using -ffunction-sections, then each function is always in
4776 // a different section (the same is true for COMDAT functions).
4777 if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4778 GV->getSection() != Caller->getSection())
4779 return false;
4780 if (const auto *F = dyn_cast<Function>(GV)) {
4781 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4782 return false;
4783 }
4784
4785 return true;
4786}
4787
4788static bool
4790 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4791 assert(Subtarget.is64BitELFABI());
4792
4793 const unsigned PtrByteSize = 8;
4794 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4795
4796 static const MCPhysReg GPR[] = {
4797 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4798 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4799 };
4800 static const MCPhysReg VR[] = {
4801 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4802 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4803 };
4804
4805 const unsigned NumGPRs = array_lengthof(GPR);
4806 const unsigned NumFPRs = 13;
4807 const unsigned NumVRs = array_lengthof(VR);
4808 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4809
4810 unsigned NumBytes = LinkageSize;
4811 unsigned AvailableFPRs = NumFPRs;
4812 unsigned AvailableVRs = NumVRs;
4813
4814 for (const ISD::OutputArg& Param : Outs) {
4815 if (Param.Flags.isNest()) continue;
4816
4817 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
4818 PtrByteSize, LinkageSize, ParamAreaSize,
4819 NumBytes, AvailableFPRs, AvailableVRs,
4820 Subtarget.hasQPX()))
4821 return true;
4822 }
4823 return false;
4824}
4825
4826static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4827 if (CB.arg_size() != CallerFn->arg_size())
4828 return false;
4829
4830 auto CalleeArgIter = CB.arg_begin();
4831 auto CalleeArgEnd = CB.arg_end();
4833
4835 const Value* CalleeArg = *CalleeArgIter;
4836 const Value* CallerArg = &(*CallerArgIter);
4837 if (CalleeArg == CallerArg)
4838 continue;
4839
4840 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4841 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4842 // }
4843 // 1st argument of callee is undef and has the same type as caller.
4844 if (CalleeArg->getType() == CallerArg->getType() &&
4846 continue;
4847
4848 return false;
4849 }
4850
4851 return true;
4852}
4853
4854// Returns true if TCO is possible between the callers and callees
4855// calling conventions.
4856static bool
4859 // Tail calls are possible with fastcc and ccc.
4860 auto isTailCallableCC = [] (CallingConv::ID CC){
4861 return CC == CallingConv::C || CC == CallingConv::Fast;
4862 };
4864 return false;
4865
4866 // We can safely tail call both fastcc and ccc callees from a c calling
4867 // convention caller. If the caller is fastcc, we may have less stack space
4868 // than a non-fastcc caller with the same signature so disable tail-calls in
4869 // that case.
4870 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4871}
4872
4873bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4874 SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4876 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4878
4879 if (DisableSCO && !TailCallOpt) return false;
4880
4881 // Variadic argument functions are not supported.
4882 if (isVarArg) return false;
4883
4884 auto &Caller = DAG.getMachineFunction().getFunction();
4885 // Check that the calling conventions are compatible for tco.
4886 if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4887 return false;
4888
4889 // Caller contains any byval parameter is not supported.
4890 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4891 return false;
4892
4893 // Callee contains any byval parameter is not supported, too.
4894 // Note: This is a quick work around, because in some cases, e.g.
4895 // caller's stack size > callee's stack size, we are still able to apply
4896 // sibling call optimization. For example, gcc is able to do SCO for caller1
4897 // in the following example, but not for caller2.
4898 // struct test {
4899 // long int a;
4900 // char ary[56];
4901 // } gTest;
4902 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4903 // b->a = v.a;
4904 // return 0;
4905 // }
4906 // void caller1(struct test a, struct test c, struct test *b) {
4907 // callee(gTest, b); }
4908 // void caller2(struct test *b) { callee(gTest, b); }
4909 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4910 return false;
4911
4912 // If callee and caller use different calling conventions, we cannot pass
4913 // parameters on stack since offsets for the parameter area may be different.
4914 if (Caller.getCallingConv() != CalleeCC &&
4915 needStackSlotPassParameters(Subtarget, Outs))
4916 return false;
4917
4918 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4919 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4920 // callee potentially have different TOC bases then we cannot tail call since
4921 // we need to restore the TOC pointer after the call.
4922 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4923 // We cannot guarantee this for indirect calls or calls to external functions.
4924 // When PC-Relative addressing is used, the concept of the TOC is no longer
4925 // applicable so this check is not required.
4926 // Check first for indirect calls.
4927 if (!Subtarget.isUsingPCRelativeCalls() &&
4929 return false;
4930
4931 // Check if we share the TOC base.
4932 if (!Subtarget.isUsingPCRelativeCalls() &&
4934 return false;
4935
4936 // TCO allows altering callee ABI, so we don't have to check further.
4938 return true;
4939
4940 if (DisableSCO) return false;
4941
4942 // If callee use the same argument list that caller is using, then we can
4943 // apply SCO on this case. If it is not, then we need to check if callee needs
4944 // stack for passing arguments.
4945 // PC Relative tail calls may not have a CallBase.
4946 // If there is no CallBase we cannot verify if we have the same argument
4947 // list so assume that we don't have the same argument list.
4948 if (CB && !hasSameArgumentList(&Caller, *CB) &&
4949 needStackSlotPassParameters(Subtarget, Outs))
4950 return false;
4951 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4952 return false;
4953
4954 return true;
4955}
4956
4957/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4958/// for tail call optimization. Targets which want to do tail call
4959/// optimization should implement this function.
4960bool
4961PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4963 bool isVarArg,
4965 SelectionDAG& DAG) const {
4966 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4967 return false;
4968
4969 // Variable argument functions are not supported.
4970 if (isVarArg)
4971 return false;
4972
4976 // Functions containing by val parameters are not supported.
4977 for (unsigned i = 0; i != Ins.size(); i++) {
4978 ISD::ArgFlagsTy Flags = Ins[i].Flags;
4979 if (Flags.isByVal()) return false;
4980 }
4981
4982 // Non-PIC/GOT tail calls are supported.
4983 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4984 return true;
4985
4986 // At the moment we can only do local tail calls (in same module, hidden
4987 // or protected) if we are generating PIC.
4989 return G->getGlobal()->hasHiddenVisibility()
4990 || G->getGlobal()->hasProtectedVisibility();
4991 }
4992
4993 return false;
4994}
4995
4996/// isCallCompatibleAddress - Return the immediate to use if the specified
4997/// 32-bit value is representable in the immediate field of a BxA instruction.
5000 if (!C) return nullptr;
5001
5002 int Addr = C->getZExtValue();
5003 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5004 SignExtend32<26>(Addr) != Addr)
5005 return nullptr; // Top 6 bits have to be sext of immediate.
5006
5007 return DAG
5008 .getConstant(
5009 (int)C->getZExtValue() >> 2, SDLoc(Op),
5011 .getNode();
5012}
5013
5014namespace {
5015
5016struct TailCallArgumentInfo {
5017 SDValue Arg;
5018 SDValue FrameIdxOp;
5019 int FrameIdx = 0;
5020
5021 TailCallArgumentInfo() = default;
5022};
5023
5024} // end anonymous namespace
5025
5026/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5028 SelectionDAG &DAG, SDValue Chain,
5031 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5032 SDValue Arg = TailCallArgs[i].Arg;
5033 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5034 int FI = TailCallArgs[i].FrameIdx;
5035 // Store relative to framepointer.
5036 MemOpChains.push_back(DAG.getStore(
5037 Chain, dl, Arg, FIN,
5039 }
5040}
5041
5042/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5043/// the appropriate stack slot for the tail call optimized function call.
5046 int SPDiff, const SDLoc &dl) {
5047 if (SPDiff) {
5048 // Calculate the new stack slot for the return address.
5050 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5051 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5052 bool isPPC64 = Subtarget.isPPC64();
5053 int SlotSize = isPPC64 ? 8 : 4;
5054 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5055 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5056 NewRetAddrLoc, true);
5057 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5059 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5061 }
5062 return Chain;
5063}
5064
5065/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5066/// the position of the argument.
5067static void
5069 SDValue Arg, int SPDiff, unsigned ArgOffset,
5071 int Offset = ArgOffset + SPDiff;
5072 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5073 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5074 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5075 SDValue FIN = DAG.getFrameIndex(FI, VT);
5076 TailCallArgumentInfo Info;
5077 Info.Arg = Arg;
5078 Info.FrameIdxOp = FIN;
5079 Info.FrameIdx = FI;
5080 TailCallArguments.push_back(Info);
5081}
5082
5083/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5084/// stack slot. Returns the chain as result and the loaded frame pointers in
5085/// LROpOut/FPOpout. Used when tail calling.
5086SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5087 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5088 SDValue &FPOpOut, const SDLoc &dl) const {
5089 if (SPDiff) {
5090 // Load the LR and FP stack slot for later adjusting.
5091 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5092 LROpOut = getReturnAddrFrameIndex(DAG);
5093 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5094 Chain = SDValue(LROpOut.getNode(), 1);
5095 }
5096 return Chain;
5097}
5098
5099/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5100/// by "Src" to address "Dst" of size "Size". Alignment information is
5101/// specified by the specific parameter attribute. The copy will be passed as
5102/// a byval function parameter.
5103/// Sometimes what we are copying is the end of a larger object, the part that
5104/// does not fit in registers.
5106 SDValue Chain, ISD::ArgFlagsTy Flags,
5107 SelectionDAG &DAG, const SDLoc &dl) {
5108 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5109 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5110 Flags.getNonZeroByValAlign(), false, false, false,
5112}
5113
5114/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5115/// tail calls.
5118 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5119 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5122 if (!isTailCall) {
5123 if (isVector) {
5124 SDValue StackPtr;
5125 if (isPPC64)
5126 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5127 else
5128 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5129 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5130 DAG.getConstant(ArgOffset, dl, PtrVT));
5131 }
5132 MemOpChains.push_back(
5133 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5134 // Calculate and remember argument location.
5135 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5137}
5138
5139static void
5141 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5142 SDValue FPOp,
5144 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5145 // might overwrite each other in case of tail call optimization.
5147 // Do not flag preceding copytoreg stuff together with the following stuff.
5148 InFlag = SDValue();
5150 MemOpChains2, dl);
5151 if (!MemOpChains2.empty())
5153
5154 // Store the return address to the appropriate stack slot.
5155 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5156
5157 // Emit callseq_end just before tailcall node.
5158 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5159 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5160 InFlag = Chain.getValue(1);
5161}
5162
5163// Is this global address that of a function that can be called by name? (as
5164// opposed to something that must hold a descriptor for an indirect call).
5167 if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5168 Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5169 return false;
5170
5171 return G->getGlobal()->getValueType()->isFunctionTy();
5172 }
5173
5174 return false;
5175}
5176
5177SDValue PPCTargetLowering::LowerCallResult(
5178 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5179 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5180 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5182 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5183 *DAG.getContext());
5184
5185 CCRetInfo.AnalyzeCallResult(
5186 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5188 : RetCC_PPC);
5189
5190 // Copy all of the result registers out of their specified physreg.
5191 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5192 CCValAssign &VA = RVLocs[i];
5193 assert(VA.isRegLoc() && "Can only return in registers!");
5194
5195 SDValue Val;
5196
5197 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5198 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5199 InFlag);
5200 Chain = Lo.getValue(1);
5201 InFlag = Lo.getValue(2);
5202 VA = RVLocs[++i]; // skip ahead to next loc
5203 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5204 InFlag);
5205 Chain = Hi.getValue(1);
5206 InFlag = Hi.getValue(2);
5207 if (!Subtarget.isLittleEndian())
5208 std::swap (Lo, Hi);
5209 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5210 } else {
5211 Val = DAG.getCopyFromReg(Chain, dl,
5212 VA.getLocReg(), VA.getLocVT(), InFlag);
5213 Chain = Val.getValue(1);
5214 InFlag = Val.getValue(2);
5215 }
5216
5217 switch (VA.getLocInfo()) {
5218 default: llvm_unreachable("Unknown loc info!");
5219 case CCValAssign::Full: break;
5220 case CCValAssign::AExt:
5221 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5222 break;
5223 case CCValAssign::ZExt:
5224 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5225 DAG.getValueType(VA.getValVT()));
5226 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5227 break;
5228 case CCValAssign::SExt:
5229 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5230 DAG.getValueType(VA.getValVT()));
5231 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5232 break;
5233 }
5234
5235 InVals.push_back(Val);
5236 }
5237
5238 return Chain;
5239}
5240
5242 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5243 // PatchPoint calls are not indirect.
5244 if (isPatchPoint)
5245 return false;
5246
5248 return false;
5249
5250 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5251 // becuase the immediate function pointer points to a descriptor instead of
5252 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5253 // pointer immediate points to the global entry point, while the BLA would
5254 // need to jump to the local entry point (see rL211174).
5255 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5257 return false;
5258
5259 return true;
5260}
5261
5262// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5263static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5264 return Subtarget.isAIXABI() ||
5265 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5266}
5267
5269 const Function &Caller,
5270 const SDValue &Callee,
5271 const PPCSubtarget &Subtarget,
5272 const TargetMachine &TM) {
5273 if (CFlags.IsTailCall)
5274 return PPCISD::TC_RETURN;
5275
5276 // This is a call through a function pointer.
5277 if (CFlags.IsIndirect) {
5278 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5279 // indirect calls. The save of the caller's TOC pointer to the stack will be
5280 // inserted into the DAG as part of call lowering. The restore of the TOC
5281 // pointer is modeled by using a pseudo instruction for the call opcode that
5282 // represents the 2 instruction sequence of an indirect branch and link,
5283 // immediately followed by a load of the TOC pointer from the the stack save
5284 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5285 // as it is not saved or used.
5287 : PPCISD::BCTRL;
5288 }
5289
5290 if (Subtarget.isUsingPCRelativeCalls()) {
5291 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5292 return PPCISD::CALL_NOTOC;
5293 }
5294
5295 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5296 // immediately following the call instruction if the caller and callee may
5297 // have different TOC bases. At link time if the linker determines the calls
5298 // may not share a TOC base, the call is redirected to a trampoline inserted
5299 // by the linker. The trampoline will (among other things) save the callers
5300 // TOC pointer at an ABI designated offset in the linkage area and the linker
5301 // will rewrite the nop to be a load of the TOC pointer from the linkage area
5302 // into gpr2.
5303 if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5304 return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5306
5307 return PPCISD::CALL;
5308}
5309
5311 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5312 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5313 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5314 return SDValue(Dest, 0);
5315
5316 // Returns true if the callee is local, and false otherwise.
5317 auto isLocalCallee = [&]() {
5320 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5321
5322 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5324 };
5325
5326 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5327 // a static relocation model causes some versions of GNU LD (2.17.50, at
5328 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5329 // built with secure-PLT.
5330 bool UsePlt =
5331 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5333
5334 // On AIX, direct function calls reference the symbol for the function's
5335 // entry point, which is named by prepending a "." before the function's
5336 // C-linkage name.
5338 [&](StringRef FuncName, bool IsDeclaration,
5339 const XCOFF::StorageClass &SC) {
5340 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5341
5343 Context.getOrCreateSymbol(Twine(".") + Twine(FuncName)));
5344
5345 if (IsDeclaration && !S->hasRepresentedCsectSet()) {
5346 // On AIX, an undefined symbol needs to be associated with a
5347 // MCSectionXCOFF to get the correct storage mapping class.
5348 // In this case, XCOFF::XMC_PR.
5349 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5353 }
5354
5355 MVT PtrVT =
5357 return DAG.getMCSymbol(S, PtrVT);
5358 };
5359
5362 const GlobalValue *GV = G->getGlobal();
5363
5364 if (!Subtarget.isAIXABI())
5365 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5366 UsePlt ? PPCII::MO_PLT : 0);
5367
5368 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5369 const GlobalObject *GO = cast<GlobalObject>(GV);
5370 const XCOFF::StorageClass SC =
5373 SC);
5374 }
5375
5377 const char *SymName = S->getSymbol();
5378 if (!Subtarget.isAIXABI())
5379 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5380 UsePlt ? PPCII::MO_PLT : 0);
5381
5382 // If there exists a user-declared function whose name is the same as the
5383 // ExternalSymbol's, then we pick up the user-declared version.
5385 if (const Function *F =
5386 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) {
5387 const XCOFF::StorageClass SC =
5389 return getAIXFuncEntryPointSymbolSDNode(F->getName(), F->isDeclaration(),
5390 SC);
5391 }
5392
5393 return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
5394 }
5395
5396 // No transformation needed.
5397 assert(Callee.getNode() && "What no callee?");
5398 return Callee;
5399}
5400
5402 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5403 "Expected a CALLSEQ_STARTSDNode.");
5404
5405 // The last operand is the chain, except when the node has glue. If the node
5406 // has glue, then the last operand is the glue, and the chain is the second
5407 // last operand.
5408 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5409 if (LastValue.getValueType() != MVT::Glue)
5410 return LastValue;
5411
5412 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5413}
5414
5415// Creates the node that moves a functions address into the count register
5416// to prepare for an indirect call instruction.
5418 SDValue &Glue, SDValue &Chain,
5419 const SDLoc &dl) {
5420 SDValue MTCTROps[] = {Chain, Callee, Glue};
5421 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5422 Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5423 makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5424 // The glue is the second value produced.
5425 Glue = Chain.getValue(1);
5426}
5427
5429 SDValue &Glue, SDValue &Chain,
5431 const CallBase *CB, const SDLoc &dl,
5432 bool hasNest,
5433 const PPCSubtarget &Subtarget) {
5434 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5435 // entry point, but to the function descriptor (the function entry point
5436 // address is part of the function descriptor though).
5437 // The function descriptor is a three doubleword structure with the
5438 // following fields: function entry point, TOC base address and
5439 // environment pointer.
5440 // Thus for a call through a function pointer, the following actions need
5441 // to be performed:
5442 // 1. Save the TOC of the caller in the TOC save area of its stack
5443 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5444 // 2. Load the address of the function entry point from the function
5445 // descriptor.
5446 // 3. Load the TOC of the callee from the function descriptor into r2.
5447 // 4. Load the environment pointer from the function descriptor into
5448 // r11.
5449 // 5. Branch to the function entry point address.
5450 // 6. On return of the callee, the TOC of the caller needs to be
5451 // restored (this is done in FinishCall()).
5452 //
5453 // The loads are scheduled at the beginning of the call sequence, and the
5454 // register copies are flagged together to ensure that no other
5455 // operations can be scheduled in between. E.g. without flagging the
5456 // copies together, a TOC access in the caller could be scheduled between
5457 // the assignment of the callee TOC and the branch to the callee, which leads
5458 // to incorrect code.
5459
5460 // Start by loading the function address from the descriptor.
5462 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5466
5467 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5468
5469 // Registers used in building the DAG.
5471 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5472
5473 // Offsets of descriptor members.
5474 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5475 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5476
5477 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5478 const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5479
5480 // One load for the functions entry point address.
5481 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5482 Alignment, MMOFlags);
5483
5484 // One for loading the TOC anchor for the module that contains the called
5485 // function.
5487 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5488 SDValue TOCPtr =
5489 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5490 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5491
5492 // One for loading the environment pointer.
5494 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5496 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5497 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5498
5499
5500 // Then copy the newly loaded TOC anchor to the TOC pointer.
5501 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5502 Chain = TOCVal.getValue(0);
5503 Glue = TOCVal.getValue(1);
5504
5505 // If the function call has an explicit 'nest' parameter, it takes the
5506 // place of the environment pointer.
5507 assert((!hasNest || !Subtarget.isAIXABI()) &&
5508 "Nest parameter is not supported on AIX.");
5509 if (!hasNest) {
5510 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5511 Chain = EnvVal.getValue(0);
5512 Glue = EnvVal.getValue(1);
5513 }
5514
5515 // The rest of the indirect call sequence is the same as the non-descriptor
5516 // DAG.
5517 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5518}
5519
5520static void
5523 SelectionDAG &DAG,
5524 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5525 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5526 const PPCSubtarget &Subtarget) {
5527 const bool IsPPC64 = Subtarget.isPPC64();
5528 // MVT for a general purpose register.
5529 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5530
5531 // First operand is always the chain.
5532 Ops.push_back(Chain);
5533
5534 // If it's a direct call pass the callee as the second operand.
5535 if (!CFlags.IsIndirect)
5536 Ops.push_back(Callee);
5537 else {
5538 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5539
5540 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5541 // on the stack (this would have been done in `LowerCall_64SVR4` or
5542 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5543 // represents both the indirect branch and a load that restores the TOC
5544 // pointer from the linkage area. The operand for the TOC restore is an add
5545 // of the TOC save offset to the stack pointer. This must be the second
5546 // operand: after the chain input but before any other variadic arguments.
5547 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5548 // saved or used.
5549 if (isTOCSaveRestoreRequired(Subtarget)) {
5551
5552 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5553 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5554 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5555 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5556 Ops.push_back(AddTOC);
5557 }
5558
5559 // Add the register used for the environment pointer.
5560 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5561 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5562 RegVT));
5563
5564
5565 // Add CTR register as callee so a bctr can be emitted later.
5566 if (CFlags.IsTailCall)
5567 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5568 }
5569
5570 // If this is a tail call add stack pointer delta.
5571 if (CFlags.IsTailCall)
5572 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5573
5574 // Add argument registers to the end of the list so that they are known live
5575 // into the call.
5576 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5577 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5578 RegsToPass[i].second.getValueType()));
5579
5580 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5581 // no way to mark dependencies as implicit here.
5582 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5583 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5584 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5585 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5586
5587 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5588 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5589 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5590
5591 // Add a register mask operand representing the call-preserved registers.
5592 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5593 const uint32_t *Mask =
5594 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5595 assert(Mask && "Missing call preserved mask for calling convention");
5596 Ops.push_back(DAG.getRegisterMask(Mask));
5597
5598 // If the glue is valid, it is the last operand.
5599 if (Glue.getNode())
5600 Ops.push_back(Glue);
5601}
5602
5603SDValue PPCTargetLowering::FinishCall(
5604 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5605 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5607 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5608 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5609
5610 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5611 Subtarget.isAIXABI())
5612 setUsesTOCBasePtr(DAG);
5613
5614 unsigned CallOpc =
5616 Subtarget, DAG.getTarget());
5617
5618 if (!CFlags.IsIndirect)
5619 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5620 else if (Subtarget.usesFunctionDescriptors())
5621 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5622 dl, CFlags.HasNest, Subtarget);
5623 else
5624 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5625
5626 // Build the operand list for the call instruction.
5628 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5629 SPDiff, Subtarget);
5630
5631 // Emit tail call.
5632 if (CFlags.IsTailCall) {
5633 // Indirect tail call when using PC Relative calls do not have the same
5634 // constraints.
5635 assert(((Callee.getOpcode() == ISD::Register &&
5636 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5637 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5638 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5640 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5641 "Expecting a global address, external symbol, absolute value, "
5642 "register or an indirect tail call when PC Relative calls are "
5643 "used.");
5644 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5646 "Unexpected call opcode for a tail call.");
5648 return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5649 }
5650
5651 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5652 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5653 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5654 Glue = Chain.getValue(1);
5655
5656 // When performing tail call optimization the callee pops its arguments off
5657 // the stack. Account for this here so these bytes can be pushed back on in
5658 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5659 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5661 ? NumBytes
5662 : 0;
5663
5664 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5665 DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5666 Glue, dl);
5667 Glue = Chain.getValue(1);
5668
5669 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5670 DAG, InVals);
5671}
5672
5673SDValue
5674PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5675 SmallVectorImpl<SDValue> &InVals) const {
5676 SelectionDAG &DAG = CLI.DAG;
5677 SDLoc &dl = CLI.DL;
5679 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5681 SDValue Chain = CLI.Chain;
5682 SDValue Callee = CLI.Callee;
5683 bool &isTailCall = CLI.IsTailCall;
5684 CallingConv::ID CallConv = CLI.CallConv;
5685 bool isVarArg = CLI.IsVarArg;
5686 bool isPatchPoint = CLI.IsPatchPoint;
5687 const CallBase *CB = CLI.CB;
5688
5689 if (isTailCall) {
5690 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5691 isTailCall = false;
5692 else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5693 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5694 Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5695 else
5696 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5697 Ins, DAG);
5698 if (isTailCall) {
5699 ++NumTailCalls;
5700 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5702
5703 // PC Relative calls no longer guarantee that the callee is a Global
5704 // Address Node. The callee could be an indirect tail call in which
5705 // case the SDValue for the callee could be a load (to load the address
5706 // of a function pointer) or it may be a register copy (to move the
5707 // address of the callee from a function parameter into a virtual
5708 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5709 assert((Subtarget.isUsingPCRelativeCalls() ||
5711 "Callee should be an llvm::Function object.");
5712
5713 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5714 << "\nTCO callee: ");
5715 LLVM_DEBUG(Callee.dump());
5716 }
5717 }
5718
5719 if (!isTailCall && CB && CB->isMustTailCall())
5720 report_fatal_error("failed to perform tail call elimination on a call "
5721 "site marked musttail");
5722
5723 // When long calls (i.e. indirect calls) are always used, calls are always
5724 // made via function pointer. If we have a function name, first translate it
5725 // into a pointer.
5726 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5727 !isTailCall)
5728 Callee = LowerGlobalAddress(Callee, DAG);
5729
5730 CallFlags CFlags(
5731 CallConv, isTailCall, isVarArg, isPatchPoint,
5732 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5733 // hasNest
5734 Subtarget.is64BitELFABI() &&
5735 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5736 CLI.NoMerge);
5737
5738 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5739 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5740 InVals, CB);
5741
5742 if (Subtarget.isSVR4ABI())
5743 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5744 InVals, CB);
5745
5746 if (Subtarget.isAIXABI())
5747 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5748 InVals, CB);
5749
5750 return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5751 InVals, CB);
5752}
5753
5754SDValue PPCTargetLowering::LowerCall_32SVR4(
5755 SDValue Chain, SDValue Callee, CallFlags CFlags,
5757 const SmallVectorImpl<SDValue> &OutVals,
5758 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5760 const CallBase *CB) const {
5761 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5762 // of the 32-bit SVR4 ABI stack frame layout.
5763
5764 const CallingConv::ID CallConv = CFlags.CallConv;
5765 const bool IsVarArg = CFlags.IsVarArg;
5766 const bool IsTailCall = CFlags.IsTailCall;
5767
5768 assert((CallConv == CallingConv::C ||
5769 CallConv == CallingConv::Cold ||
5770 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5771
5772 const Align PtrAlign(4);
5773
5775
5776 // Mark this function as potentially containing a function that contains a
5777 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5778 // and restoring the callers stack pointer in this functions epilog. This is
5779 // done because by tail calling the called function might overwrite the value
5780 // in this function's (MF) stack pointer stack slot 0(SP).
5781 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5782 CallConv == CallingConv::Fast)
5783 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5784
5785 // Count how many bytes are to be pushed on the stack, including the linkage
5786 // area, parameter list area and the part of the local variable space which
5787 // contains copies of aggregates which are passed by value.
5788
5789 // Assign locations to all of the outgoing arguments.
5791 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5792
5793 // Reserve space for the linkage area on the stack.
5794 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5795 PtrAlign);
5796 if (useSoftFloat())
5797 CCInfo.PreAnalyzeCallOperands(Outs);
5798
5799 if (IsVarArg) {
5800 // Handle fixed and variable vector arguments differently.
5801 // Fixed vector arguments go into registers as long as registers are
5802 // available. Variable vector arguments always go into memory.
5803 unsigned NumArgs = Outs.size();
5804
5805 for (unsigned i = 0; i != NumArgs; ++i) {
5806 MVT ArgVT = Outs[i].VT;
5807 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5808 bool Result;
5809
5810 if (Outs[i].IsFixed) {
5812 CCInfo);
5813 } else {
5815 ArgFlags, CCInfo);
5816 }
5817
5818 if (Result) {
5819#ifndef NDEBUG
5820 errs() << "Call operand #" << i << " has unhandled type "
5821 << EVT(ArgVT).getEVTString() << "\n";
5822#endif
5823 llvm_unreachable(nullptr);
5824 }
5825 }
5826 } else {
5827 // All arguments are treated the same.
5828 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5829 }
5830 CCInfo.clearWasPPCF128();
5831
5832 // Assign locations to all of the outgoing aggregate by value arguments.
5834 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5835
5836 // Reserve stack space for the allocations in CCInfo.
5837 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5838
5839 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5840
5841 // Size of the linkage area, parameter list area and the part of the local
5842 // space variable where copies of aggregates which are passed by value are
5843 // stored.
5844 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5845
5846 // Calculate by how many bytes the stack has to be adjusted in case of tail
5847 // call optimization.
5848 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5849
5850 // Adjust the stack pointer for the new arguments...
5851 // These operations are automatically eliminated by the prolog/epilog pass
5852 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5853 SDValue CallSeqStart = Chain;
5854
5855 // Load the return address and frame pointer so it can be moved somewhere else
5856 // later.
5857 SDValue LROp, FPOp;
5858 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5859
5860 // Set up a copy of the stack pointer for use loading and storing any
5861 // arguments that may not fit in the registers available for argument
5862 // passing.
5863 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5864
5868
5869 bool seenFloatArg = false;
5870 // Walk the register/memloc assignments, inserting copies/loads.
5871 // i - Tracks the index into the list of registers allocated for the call
5872 // RealArgIdx - Tracks the index into the list of actual function arguments
5873 // j - Tracks the index into the list of byval arguments
5874 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5875 i != e;
5876 ++i, ++RealArgIdx) {
5877 CCValAssign &VA = ArgLocs[i];
5878 SDValue Arg = OutVals[RealArgIdx];
5879 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5880
5881 if (Flags.isByVal()) {
5882 // Argument is an aggregate which is passed by value, thus we need to
5883 // create a copy of it in the local variable space of the current stack
5884 // frame (which is the stack frame of the caller) and pass the address of
5885 // this copy to the callee.
5886 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5888 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5889
5890 // Memory reserved in the local variable space of the callers stack frame.
5891 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5892
5895 StackPtr, PtrOff);
5896
5897 // Create a copy of the argument in the local area of the current
5898 // stack frame.
5901 CallSeqStart.getNode()->getOperand(0),
5902 Flags, DAG, dl);
5903
5904 // This must go outside the CALLSEQ_START..END.
5906 SDLoc(MemcpyCall));
5907 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5908 NewCallSeqStart.getNode());
5909 Chain = CallSeqStart = NewCallSeqStart;
5910
5911 // Pass the address of the aggregate copy on the stack either in a
5912 // physical register or in the parameter list area of the current stack
5913 // frame to the callee.
5914 Arg = PtrOff;
5915 }
5916
5917 // When useCRBits() is true, there can be i1 arguments.
5918 // It is because getRegisterType(MVT::i1) => MVT::i1,
5919 // and for other integer types getRegisterType() => MVT::i32.
5920 // Extend i1 and ensure callee will get i32.
5921 if (Arg.getValueType() == MVT::i1)
5922 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5923 dl, MVT::i32, Arg);
5924
5925 if (VA.isRegLoc()) {
5926 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5927 // Put argument in a physical register.
5928 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5929 bool IsLE = Subtarget.isLittleEndian();
5931 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5932 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5934 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5935 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5936 SVal.getValue(0)));
5937 } else
5938 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5939 } else {
5940 // Put argument in the parameter list area of the current stack frame.
5941 assert(VA.isMemLoc());
5942 unsigned LocMemOffset = VA.getLocMemOffset();
5943
5944 if (!IsTailCall) {
5947 StackPtr, PtrOff);
5948
5949 MemOpChains.push_back(
5950 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5951 } else {
5952 // Calculate and remember argument location.
5955 }
5956 }
5957 }
5958
5959 if (!MemOpChains.empty())
5961
5962 // Build a sequence of copy-to-reg nodes chained together with token chain
5963 // and flag operands which copy the outgoing args into the appropriate regs.
5965 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5966 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5967 RegsToPass[i].second, InFlag);
5968 InFlag = Chain.getValue(1);
5969 }
5970
5971 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5972 // registers.
5973 if (IsVarArg) {
5975 SDValue Ops[] = { Chain, InFlag };
5976
5977 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5978 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5979
5980 InFlag = Chain.getValue(1);
5981 }
5982
5983 if (IsTailCall)
5984 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5986
5987 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5988 Callee, SPDiff, NumBytes, Ins, InVals, CB);
5989}
5990
5991// Copy an argument into memory, being careful to do this outside the
5992// call sequence for the call to which the argument belongs.
5993SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5995 SelectionDAG &DAG, const SDLoc &dl) const {
5997 CallSeqStart.getNode()->getOperand(0),
5998 Flags, DAG, dl);
5999 // The MEMCPY must go outside the CALLSEQ_START..END.
6000 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6002 SDLoc(MemcpyCall));
6003 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6004 NewCallSeqStart.getNode());
6005 return NewCallSeqStart;
6006}
6007
6008SDValue PPCTargetLowering::LowerCall_64SVR4(
6009 SDValue Chain, SDValue Callee, CallFlags CFlags,
6011 const SmallVectorImpl<SDValue> &OutVals,
6012 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6014 const CallBase *CB) const {
6015 bool isELFv2ABI = Subtarget.isELFv2ABI();
6016 bool isLittleEndian = Subtarget.isLittleEndian();
6017 unsigned NumOps = Outs.size();
6018 bool IsSibCall = false;
6019 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6020
6022 unsigned PtrByteSize = 8;
6023
6025
6026 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6027 IsSibCall = true;
6028
6029 // Mark this function as potentially containing a function that contains a
6030 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6031 // and restoring the callers stack pointer in this functions epilog. This is
6032 // done because by tail calling the called function might overwrite the value
6033 // in this function's (MF) stack pointer stack slot 0(SP).
6034 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6035 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6036
6037 assert(!(IsFastCall && CFlags.IsVarArg) &&
6038 "fastcc not supported on varargs functions");
6039
6040 // Count how many bytes are to be pushed on the stack, including the linkage
6041 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6042 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6043 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6044 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6045 unsigned NumBytes = LinkageSize;
6046 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6047 unsigned &QFPR_idx = FPR_idx;
6048
6049 static const MCPhysReg GPR[] = {
6050 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6051 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6052 };
6053 static const MCPhysReg VR[] = {
6054 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6055 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6056 };
6057
6058 const unsigned NumGPRs = array_lengthof(GPR);
6059 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6060 const unsigned NumVRs = array_lengthof(VR);
6061 const unsigned NumQFPRs = NumFPRs;
6062
6063 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6064 // can be passed to the callee in registers.
6065 // For the fast calling convention, there is another check below.
6066 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6067 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6068 if (!HasParameterArea) {
6069 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6070 unsigned AvailableFPRs = NumFPRs;
6071 unsigned AvailableVRs = NumVRs;
6072 unsigned NumBytesTmp = NumBytes;
6073 for (unsigned i = 0; i != NumOps; ++i) {
6074 if (Outs[i].Flags.isNest()) continue;
6075 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6076 PtrByteSize, LinkageSize, ParamAreaSize,
6078 Subtarget.hasQPX()))
6079 HasParameterArea = true;
6080 }
6081 }
6082
6083 // When using the fast calling convention, we don't provide backing for
6084 // arguments that will be in registers.
6085 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6086
6087 // Avoid allocating parameter area for fastcc functions if all the arguments
6088 // can be passed in the registers.
6089 if (IsFastCall)
6090 HasParameterArea = false;
6091
6092 // Add up all the space actually used.
6093 for (unsigned i = 0; i != NumOps; ++i) {
6094 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6095 EVT ArgVT = Outs[i].VT;
6096 EVT OrigVT = Outs[i].ArgVT;
6097
6098 if (Flags.isNest())
6099 continue;
6100
6101 if (IsFastCall) {
6102 if (Flags.isByVal()) {
6103 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6104 if (NumGPRsUsed > NumGPRs)
6105 HasParameterArea = true;
6106 } else {
6107 switch (ArgVT.getSimpleVT().SimpleTy) {
6108 default: llvm_unreachable("Unexpected ValueType for argument!");
6109 case MVT::i1:
6110 case MVT::i32:
6111 case MVT::i64:
6112 if (++NumGPRsUsed <= NumGPRs)
6113 continue;
6114 break;
6115 case MVT::v4i32:
6116 case MVT::v8i16:
6117 case MVT::v16i8:
6118 case MVT::v2f64:
6119 case MVT::v2i64:
6120 case MVT::v1i128:
6121 case MVT::f128:
6122 if (++NumVRsUsed <= NumVRs)
6123 continue;
6124 break;
6125 case MVT::v4f32:
6126 // When using QPX, this is handled like a FP register, otherwise, it
6127 // is an Altivec register.
6128 if (Subtarget.hasQPX()) {
6129 if (++NumFPRsUsed <= NumFPRs)
6130 continue;
6131 } else {
6132 if (++NumVRsUsed <= NumVRs)
6133 continue;
6134 }
6135 break;
6136 case MVT::f32:
6137 case MVT::f64:
6138 case MVT::v4f64: // QPX
6139 case MVT::v4i1: // QPX
6140 if (++NumFPRsUsed <= NumFPRs)
6141 continue;
6142 break;
6143 }
6144 HasParameterArea = true;
6145 }
6146 }
6147
6148 /* Respect alignment of argument on the stack. */
6149 auto Alignement =
6151 NumBytes = alignTo(NumBytes, Alignement);
6152
6153 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6154 if (Flags.isInConsecutiveRegsLast())
6155 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6156 }
6157
6158 unsigned NumBytesActuallyUsed = NumBytes;
6159
6160 // In the old ELFv1 ABI,
6161 // the prolog code of the callee may store up to 8 GPR argument registers to
6162 // the stack, allowing va_start to index over them in memory if its varargs.
6163 // Because we cannot tell if this is needed on the caller side, we have to
6164 // conservatively assume that it is needed. As such, make sure we have at
6165 // least enough stack space for the caller to store the 8 GPRs.
6166 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6167 // really requires memory operands, e.g. a vararg function.
6168 if (HasParameterArea)
6169 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6170 else
6171 NumBytes = LinkageSize;
6172
6173 // Tail call needs the stack to be aligned.
6174 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6175 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6176
6177 int SPDiff = 0;
6178
6179 // Calculate by how many bytes the stack has to be adjusted in case of tail
6180 // call optimization.
6181 if (!IsSibCall)
6182 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6183
6184 // To protect arguments on the stack from being clobbered in a tail call,
6185 // force all the loads to happen before doing any other lowering.
6186 if (CFlags.IsTailCall)
6187 Chain = DAG.getStackArgumentTokenFactor(Chain);
6188
6189 // Adjust the stack pointer for the new arguments...
6190 // These operations are automatically eliminated by the prolog/epilog pass
6191 if (!IsSibCall)
6192 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6193 SDValue CallSeqStart = Chain;
6194
6195 // Load the return address and frame pointer so it can be move somewhere else
6196 // later.
6197 SDValue LROp, FPOp;
6198 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6199
6200 // Set up a copy of the stack pointer for use loading and storing any
6201 // arguments that may not fit in the registers available for argument
6202 // passing.
6203 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6204
6205 // Figure out which arguments are going to go in registers, and which in
6206 // memory. Also, if this is a vararg function, floating point operations
6207 // must be stored to our stack, and loaded into integer regs as well, if
6208 // any integer regs are available for argument passing.
6209 unsigned ArgOffset = LinkageSize;
6210
6213
6215 for (unsigned i = 0; i != NumOps; ++i) {
6216 SDValue Arg = OutVals[i];
6217 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6218 EVT ArgVT = Outs[i].VT;
6219 EVT OrigVT = Outs[i].ArgVT;
6220
6221 // PtrOff will be used to store the current argument to the stack if a
6222 // register cannot be found for it.
6224
6225 // We re-align the argument offset for each argument, except when using the
6226 // fast calling convention, when we need to make sure we do that only when
6227 // we'll actually use a stack slot.
6228 auto ComputePtrOff = [&]() {
6229 /* Respect alignment of argument on the stack. */
6230 auto Alignment =
6232 ArgOffset = alignTo(ArgOffset, Alignment);
6233
6234 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6235
6236 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6237 };
6238
6239 if (!IsFastCall) {
6240 ComputePtrOff();
6241
6242 /* Compute GPR index associated with argument offset. */
6243 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6244 GPR_idx = std::min(GPR_idx, NumGPRs);
6245 }
6246
6247 // Promote integers to 64-bit values.
6248 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6249 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6250 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6251 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6252 }
6253
6254 // FIXME memcpy is used way more than necessary. Correctness first.
6255 // Note: "by value" is code for passing a structure by value, not
6256 // basic types.
6257 if (Flags.isByVal()) {
6258 // Note: Size includes alignment padding, so
6259 // struct x { short a; char b; }
6260 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6261 // These are the proper values we need for right-justifying the
6262 // aggregate in a parameter register.
6263 unsigned Size = Flags.getByValSize();
6264
6265 // An empty aggregate parameter takes up no storage and no
6266 // registers.
6267 if (Size == 0)
6268 continue;
6269
6270 if (IsFastCall)
6271 ComputePtrOff();
6272
6273 // All aggregates smaller than 8 bytes must be passed right-justified.
6274 if (Size==1 || Size==2 || Size==4) {
6275 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6276 if (GPR_idx != NumGPRs) {
6277 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6278 MachinePointerInfo(), VT);
6279 MemOpChains.push_back(Load.getValue(1));
6280 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6281
6283 continue;
6284 }
6285 }
6286
6287 if (GPR_idx == NumGPRs && Size < 8) {
6289 if (!isLittleEndian) {
6291 PtrOff.getValueType());
6292 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6293 }
6294 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6296 Flags, DAG, dl);
6298 continue;
6299 }
6300 // Copy entire object into memory. There are cases where gcc-generated
6301 // code assumes it is there, even if it could be put entirely into
6302 // registers. (This is not what the doc says.)
6303
6304 // FIXME: The above statement is likely due to a misunderstanding of the
6305 // documents. All arguments must be copied into the parameter area BY
6306 // THE CALLEE in the event that the callee takes the address of any
6307 // formal argument. That has not yet been implemented. However, it is
6308 // reasonable to use the stack area as a staging area for the register
6309 // load.
6310
6311 // Skip this for small aggregates, as we will use the same slot for a
6312 // right-justified copy, below.
6313 if (Size >= 8)
6314 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6316 Flags, DAG, dl);
6317
6318 // When a register is available, pass a small aggregate right-justified.
6319 if (Size < 8 && GPR_idx != NumGPRs) {
6320 // The easiest way to get this right-justified in a register
6321 // is to copy the structure into the rightmost portion of a
6322 // local variable slot, then load the whole slot into the
6323 // register.
6324 // FIXME: The memcpy seems to produce pretty awful code for
6325 // small aggregates, particularly for packed ones.
6326 // FIXME: It would be preferable to use the slot in the
6327 // parameter save area instead of a new local variable.
6329 if (!isLittleEndian) {
6330 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6331 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6332 }
6333 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6335 Flags, DAG, dl);
6336
6337 // Load the slot into the register.
6338 SDValue Load =
6339 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6340 MemOpChains.push_back(Load.getValue(1));
6341 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6342
6343 // Done with this argument.
6345 continue;
6346 }
6347
6348 // For aggregates larger than PtrByteSize, copy the pieces of the
6349 // object that fit into registers from the parameter save area.
6350 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6351 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6352 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6353 if (GPR_idx != NumGPRs) {
6354 SDValue Load =
6355 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6356 MemOpChains.push_back(Load.getValue(1));
6357 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6359 } else {
6361 break;
6362 }
6363 }
6364 continue;
6365 }
6366
6367 switch (Arg.getSimpleValueType().SimpleTy) {
6368 default: llvm_unreachable("Unexpected ValueType for argument!");
6369 case MVT::i1:
6370 case MVT::i32:
6371 case MVT::i64:
6372 if (Flags.isNest()) {
6373 // The 'nest' parameter, if any, is passed in R11.
6374 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6375 break;
6376 }
6377
6378 // These can be scalar arguments or elements of an integer array type
6379 // passed directly. Clang may use those instead of "byval" aggregate
6380 // types to avoid forcing arguments to memory unnecessarily.
6381 if (GPR_idx != NumGPRs) {
6382 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6383 } else {
6384 if (IsFastCall)
6385 ComputePtrOff();
6386
6388 "Parameter area must exist to pass an argument in memory.");
6389 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6390 true, CFlags.IsTailCall, false, MemOpChains,
6391 TailCallArguments, dl);
6392 if (IsFastCall)
6394 }
6395 if (!IsFastCall)
6397 break;
6398 case MVT::f32:
6399 case MVT::f64: {
6400 // These can be scalar arguments or elements of a float array type
6401 // passed directly. The latter are used to implement ELFv2 homogenous
6402 // float aggregates.
6403
6404 // Named arguments go into FPRs first, and once they overflow, the
6405 // remaining arguments go into GPRs and then the parameter save area.
6406 // Unnamed arguments for vararg functions always go to GPRs and
6407 // then the parameter save area. For now, put all arguments to vararg
6408 // routines always in both locations (FPR *and* GPR or stack slot).
6409 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6410 bool NeededLoad = false;
6411
6412 // First load the argument into the next available FPR.
6413 if (FPR_idx != NumFPRs)
6414 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6415
6416 // Next, load the argument into GPR or stack slot if needed.
6417 if (!NeedGPROrStack)
6418 ;
6419 else if (GPR_idx != NumGPRs && !IsFastCall) {
6420 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6421 // once we support fp <-> gpr moves.
6422
6423 // In the non-vararg case, this can only ever happen in the
6424 // presence of f32 array types, since otherwise we never run
6425 // out of FPRs before running out of GPRs.
6427
6428 // Double values are always passed in a single GPR.
6429 if (Arg.getValueType() != MVT::f32) {
6430 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6431
6432 // Non-array float values are extended and passed in a GPR.
6433 } else if (!Flags.isInConsecutiveRegs()) {
6434 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6436
6437 // If we have an array of floats, we collect every odd element
6438 // together with its predecessor into one GPR.
6439 } else if (ArgOffset % PtrByteSize != 0) {
6440 SDValue Lo, Hi;
6441 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6442 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6443 if (!isLittleEndian)
6444 std::swap(Lo, Hi);
6445 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6446
6447 // The final element, if even, goes into the first half of a GPR.
6448 } else if (Flags.isInConsecutiveRegsLast()) {
6449 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6451 if (!isLittleEndian)
6453 DAG.getConstant(32, dl, MVT::i32));
6454
6455 // Non-final even elements are skipped; they will be handled
6456 // together the with subsequent argument on the next go-around.
6457 } else
6458 ArgVal = SDValue();
6459
6460 if (ArgVal.getNode())
6461 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6462 } else {
6463 if (IsFastCall)
6464 ComputePtrOff();
6465
6466 // Single-precision floating-point values are mapped to the
6467 // second (rightmost) word of the stack doubleword.
6468 if (Arg.getValueType() == MVT::f32 &&
6469 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6470 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6472 }
6473
6475 "Parameter area must exist to pass an argument in memory.");
6476 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6477 true, CFlags.IsTailCall, false, MemOpChains,
6478 TailCallArguments, dl);
6479
6480 NeededLoad = true;
6481 }
6482 // When passing an array of floats, the array occupies consecutive
6483 // space in the argument area; only round up to the next doubleword
6484 // at the end of the array. Otherwise, each float takes 8 bytes.
6485 if (!IsFastCall || NeededLoad) {
6486 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6487 Flags.isInConsecutiveRegs()) ? 4 : 8;
6488 if (Flags.isInConsecutiveRegsLast())
6490 }
6491 break;
6492 }
6493 case MVT::v4f32:
6494 case MVT::v4i32:
6495 case MVT::v8i16:
6496 case MVT::v16i8:
6497 case MVT::v2f64:
6498 case MVT::v2i64:
6499 case MVT::v1i128:
6500 case MVT::f128:
6501 if (!Subtarget.hasQPX()) {
6502 // These can be scalar arguments or elements of a vector array type
6503 // passed directly. The latter are used to implement ELFv2 homogenous
6504 // vector aggregates.
6505
6506 // For a varargs call, named arguments go into VRs or on the stack as
6507 // usual; unnamed arguments always go to the stack or the corresponding
6508 // GPRs when within range. For now, we always put the value in both
6509 // locations (or even all three).
6510 if (CFlags.IsVarArg) {
6512 "Parameter area must exist if we have a varargs call.");
6513 // We could elide this store in the case where the object fits
6514 // entirely in R registers. Maybe later.
6515 SDValue Store =
6516 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6517 MemOpChains.push_back(Store);
6518 if (VR_idx != NumVRs) {
6519 SDValue Load =
6520 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6521 MemOpChains.push_back(Load.getValue(1));
6522 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6523 }
6524 ArgOffset += 16;
6525 for (unsigned i=0; i<16; i+=PtrByteSize) {
6526 if (GPR_idx == NumGPRs)
6527 break;
6528 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6529 DAG.getConstant(i, dl, PtrVT));
6530 SDValue Load =
6531 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6532 MemOpChains.push_back(Load.getValue(1));
6533 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6534 }
6535 break;
6536 }
6537
6538 // Non-varargs Altivec params go into VRs or on the stack.
6539 if (VR_idx != NumVRs) {
6540 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6541 } else {
6542 if (IsFastCall)
6543 ComputePtrOff();
6544
6546 "Parameter area must exist to pass an argument in memory.");
6547 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6548 true, CFlags.IsTailCall, true, MemOpChains,
6549 TailCallArguments, dl);
6550 if (IsFastCall)
6551 ArgOffset += 16;
6552 }
6553
6554 if (!IsFastCall)
6555 ArgOffset += 16;
6556 break;
6557 } // not QPX
6558
6559 assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
6560 "Invalid QPX parameter type");
6561
6563 case MVT::v4f64:
6564 case MVT::v4i1: {
6565 bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
6566 if (CFlags.IsVarArg) {
6568 "Parameter area must exist if we have a varargs call.");
6569 // We could elide this store in the case where the object fits
6570 // entirely in R registers. Maybe later.
6571 SDValue Store =
6572 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6573 MemOpChains.push_back(Store);
6574 if (QFPR_idx != NumQFPRs) {
6575 SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, Store,
6577 MemOpChains.push_back(Load.getValue(1));
6578 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
6579 }
6580 ArgOffset += (IsF32 ? 16 : 32);
6581 for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
6582 if (GPR_idx == NumGPRs)
6583 break;
6584 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6585 DAG.getConstant(i, dl, PtrVT));
6586 SDValue Load =
6587 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6588 MemOpChains.push_back(Load.getValue(1));
6589 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6590 }
6591 break;
6592 }
6593
6594 // Non-varargs QPX params go into registers or on the stack.
6595 if (QFPR_idx != NumQFPRs) {
6596 RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
6597 } else {
6598 if (IsFastCall)
6599 ComputePtrOff();
6600
6602 "Parameter area must exist to pass an argument in memory.");
6603 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6604 true, CFlags.IsTailCall, true, MemOpChains,
6605 TailCallArguments, dl);
6606 if (IsFastCall)
6607 ArgOffset += (IsF32 ? 16 : 32);
6608 }
6609
6610 if (!IsFastCall)
6611 ArgOffset += (IsF32 ? 16 : 32);
6612 break;
6613 }
6614 }
6615 }
6616
6618 "mismatch in size of parameter area");
6620
6621 if (!MemOpChains.empty())
6623
6624 // Check if this is an indirect call (MTCTR/BCTRL).
6625 // See prepareDescriptorIndirectCall and buildCallOperands for more
6626 // information about calls through function pointers in the 64-bit SVR4 ABI.
6627 if (CFlags.IsIndirect) {
6628 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6629 // caller in the TOC save area.
6630 if (isTOCSaveRestoreRequired(Subtarget)) {
6631 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6632 // Load r2 into a virtual register and store it to the TOC save area.
6633 setUsesTOCBasePtr(DAG);
6634 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6635 // TOC save area offset.
6636 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6637 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6638 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6639 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6641 DAG.getMachineFunction(), TOCSaveOffset));
6642 }
6643 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6644 // This does not mean the MTCTR instruction must use R12; it's easier
6645 // to model this as an extra parameter, so do that.
6646 if (isELFv2ABI && !CFlags.IsPatchPoint)
6647 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6648 }
6649
6650 // Build a sequence of copy-to-reg nodes chained together with token chain
6651 // and flag operands which copy the outgoing args into the appropriate regs.
6653 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6654 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6655 RegsToPass[i].second, InFlag);
6656 InFlag = Chain.getValue(1);
6657 }
6658
6659 if (CFlags.IsTailCall && !IsSibCall)
6660 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6662
6663 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6664 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6665}
6666
6667SDValue PPCTargetLowering::LowerCall_Darwin(
6668 SDValue Chain, SDValue Callee, CallFlags CFlags,
6670 const SmallVectorImpl<SDValue> &OutVals,
6671 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6673 const CallBase *CB) const {
6674 unsigned NumOps = Outs.size();
6675
6677 bool isPPC64 = PtrVT == MVT::i64;
6678 unsigned PtrByteSize = isPPC64 ? 8 : 4;
6679
6681
6682 // Mark this function as potentially containing a function that contains a
6683 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6684 // and restoring the callers stack pointer in this functions epilog. This is
6685 // done because by tail calling the called function might overwrite the value
6686 // in this function's (MF) stack pointer stack slot 0(SP).
6687 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6688 CFlags.CallConv == CallingConv::Fast)
6689 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6690
6691 // Count how many bytes are to be pushed on the stack, including the linkage
6692 // area, and parameter passing area. We start with 24/48 bytes, which is
6693 // prereserved space for [SP][CR][LR][3 x unused].
6694 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6695 unsigned NumBytes = LinkageSize;
6696
6697 // Add up all the space actually used.
6698 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6699 // they all go in registers, but we must reserve stack space for them for
6700 // possible use by the caller. In varargs or 64-bit calls, parameters are
6701 // assigned stack space in order, with padding so Altivec parameters are
6702 // 16-byte aligned.
6703 unsigned nAltivecParamsAtEnd = 0;
6704 for (unsigned i = 0; i != NumOps; ++i) {
6705 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6706 EVT ArgVT = Outs[i].VT;
6707 // Varargs Altivec parameters are padded to a 16 byte boundary.
6708 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6709 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6710 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6711 if (!CFlags.IsVarArg && !isPPC64) {
6712 // Non-varargs Altivec parameters go after all the non-Altivec
6713 // parameters; handle those later so we know how much padding we need.
6715 continue;
6716 }
6717 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6718 NumBytes = ((NumBytes+15)/16)*16;
6719 }
6720 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6721 }
6722
6723 // Allow for Altivec parameters at the end, if needed.
6724 if (nAltivecParamsAtEnd) {
6725 NumBytes = ((NumBytes+15)/16)*16;
6726 NumBytes += 16*nAltivecParamsAtEnd;
6727 }
6728
6729 // The prolog code of the callee may store up to 8 GPR argument registers to
6730 // the stack, allowing va_start to index over them in memory if its varargs.
6731 // Because we cannot tell if this is needed on the caller side, we have to
6732 // conservatively assume that it is needed. As such, make sure we have at
6733 // least enough stack space for the caller to store the 8 GPRs.
6734 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6735
6736 // Tail call needs the stack to be aligned.
6737 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6738 CFlags.CallConv == CallingConv::Fast)
6739 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6740
6741 // Calculate by how many bytes the stack has to be adjusted in case of tail
6742 // call optimization.
6743 int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6744
6745 // To protect arguments on the stack from being clobbered in a tail call,
6746 // force all the loads to happen before doing any other lowering.
6747 if (CFlags.IsTailCall)
6748 Chain = DAG.getStackArgumentTokenFactor(Chain);
6749
6750 // Adjust the stack pointer for the new arguments...
6751 // These operations are automatically eliminated by the prolog/epilog pass
6752 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6753 SDValue CallSeqStart = Chain;
6754
6755 // Load the return address and frame pointer so it can be move somewhere else
6756 // later.
6757 SDValue LROp, FPOp;
6758 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6759
6760 // Set up a copy of the stack pointer for use loading and storing any
6761 // arguments that may not fit in the registers available for argument
6762 // passing.
6764 if (isPPC64)
6765 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6766 else
6767 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6768
6769 // Figure out which arguments are going to go in registers, and which in
6770 // memory. Also, if this is a vararg function, floating point operations
6771 // must be stored to our stack, and loaded into integer regs as well, if
6772 // any integer regs are available for argument passing.
6773 unsigned ArgOffset = LinkageSize;
6774 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6775
6776 static const MCPhysReg GPR_32[] = { // 32-bit registers.
6777 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6778 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6779 };
6780 static const MCPhysReg GPR_64[] = { // 64-bit registers.
6781 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6782 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6783 };
6784 static const MCPhysReg VR[] = {
6785 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6786 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6787 };
6788 const unsigned NumGPRs = array_lengthof(GPR_32);
6789 const unsigned NumFPRs = 13;
6790 const unsigned NumVRs = array_lengthof(VR);
6791
6792 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6793
6796
6798 for (unsigned i = 0; i != NumOps; ++i) {
6799 SDValue Arg = OutVals[i];
6800 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6801
6802 // PtrOff will be used to store the current argument to the stack if a
6803 // register cannot be found for it.
6805
6806 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6807
6808 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6809
6810 // On PPC64, promote integers to 64-bit values.
6811 if (isPPC64 && Arg.getValueType() == MVT::i32) {
6812 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6813 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6814 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6815 }
6816
6817 // FIXME memcpy is used way more than necessary. Correctness first.
6818 // Note: "by value" is code for passing a structure by value, not
6819 // basic types.
6820 if (Flags.isByVal()) {
6821 unsigned Size = Flags.getByValSize();
6822 // Very small objects are passed right-justified. Everything else is
6823 // passed left-justified.
6824 if (Size==1 || Size==2) {
6825 EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6826 if (GPR_idx != NumGPRs) {
6827 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6828 MachinePointerInfo(), VT);
6829 MemOpChains.push_back(Load.getValue(1));
6830 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6831
6833 } else {
6835 PtrOff.getValueType());
6836 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6837 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6839 Flags, DAG, dl);
6841 }
6842 continue;
6843 }
6844 // Copy entire object into memory. There are cases where gcc-generated
6845 // code assumes it is there, even if it could be put entirely into
6846 // registers. (This is not what the doc says.)
6847 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6849 Flags, DAG, dl);
6850
6851 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6852 // copy the pieces of the object that fit into registers from the
6853 // parameter save area.
6854 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6855 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6856 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6857 if (GPR_idx != NumGPRs) {
6858 SDValue Load =
6859 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6860 MemOpChains.push_back(Load.getValue(1));
6861 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6863 } else {
6865 break;
6866 }
6867 }
6868 continue;
6869 }
6870
6871 switch (Arg.getSimpleValueType().SimpleTy) {
6872 default: llvm_unreachable("Unexpected ValueType for argument!");
6873 case MVT::i1:
6874 case MVT::i32:
6875 case MVT::i64:
6876 if (GPR_idx != NumGPRs) {
6877 if (Arg.getValueType() == MVT::i1)
6878 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6879
6880 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6881 } else {
6882 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6883 isPPC64, CFlags.IsTailCall, false, MemOpChains,
6884 TailCallArguments, dl);
6885 }
6887 break;
6888 case MVT::f32:
6889 case MVT::f64:
6890 if (FPR_idx != NumFPRs) {
6891 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6892
6893 if (CFlags.IsVarArg) {
6894 SDValue Store =
6895 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6896 MemOpChains.push_back(Store);
6897
6898 // Float varargs are always shadowed in available integer registers
6899 if (GPR_idx != NumGPRs) {
6900 SDValue Load =
6901 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6902 MemOpChains.push_back(Load.getValue(1));
6903 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6904 }
6905 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6906 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6908 SDValue Load =
6909 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6910 MemOpChains.push_back(Load.getValue(1));
6911 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6912 }
6913 } else {
6914 // If we have any FPRs remaining, we may also have GPRs remaining.
6915 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6916 // GPRs.
6917 if (GPR_idx != NumGPRs)
6918 ++GPR_idx;
6919 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6920 !isPPC64) // PPC64 has 64-bit GPR's obviously :)
6921 ++GPR_idx;
6922 }
6923 } else
6924 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6925 isPPC64, CFlags.IsTailCall, false, MemOpChains,
6926 TailCallArguments, dl);
6927 if (isPPC64)
6928 ArgOffset += 8;
6929 else
6930 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6931 break;
6932 case MVT::v4f32:
6933 case MVT::v4i32:
6934 case MVT::v8i16:
6935 case MVT::v16i8:
6936 if (CFlags.IsVarArg) {
6937 // These go aligned on the stack, or in the corresponding R registers
6938 // when within range. The Darwin PPC ABI doc claims they also go in
6939 // V registers; in fact gcc does this only for arguments that are
6940 // prototyped, not for those that match the ... We do it for all
6941 // arguments, seems to work.
6942 while (ArgOffset % 16 !=0) {
6944 if (GPR_idx != NumGPRs)
6945 GPR_idx++;
6946 }
6947 // We could elide this store in the case where the object fits
6948 // entirely in R registers. Maybe later.
6949 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6950 DAG.getConstant(ArgOffset, dl, PtrVT));
6951 SDValue Store =
6952 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6953 MemOpChains.push_back(Store);
6954 if (VR_idx != NumVRs) {
6955 SDValue Load =
6956 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6957 MemOpChains.push_back(Load.getValue(1));
6958 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6959 }
6960 ArgOffset += 16;
6961 for (unsigned i=0; i<16; i+=PtrByteSize) {
6962 if (GPR_idx == NumGPRs)
6963 break;
6964 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6965 DAG.getConstant(i, dl, PtrVT));
6966 SDValue Load =
6967 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6968 MemOpChains.push_back(Load.getValue(1));
6969 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6970 }
6971 break;
6972 }
6973
6974 // Non-varargs Altivec params generally go in registers, but have
6975 // stack space allocated at the end.
6976 if (VR_idx != NumVRs) {
6977 // Doesn't have GPR space allocated.
6978 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6979 } else if (nAltivecParamsAtEnd==0) {
6980 // We are emitting Altivec params in order.
6981 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6982 isPPC64, CFlags.IsTailCall, true, MemOpChains,
6983 TailCallArguments, dl);
6984 ArgOffset += 16;
6985 }
6986 break;
6987 }
6988 }
6989 // If all Altivec parameters fit in registers, as they usually do,
6990 // they get stack space following the non-Altivec parameters. We
6991 // don't track this here because nobody below needs it.
6992 // If there are more Altivec parameters than fit in registers emit
6993 // the stores here.
6994 if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
6995 unsigned j = 0;
6996 // Offset is aligned; skip 1st 12 params which go in V registers.
6997 ArgOffset = ((ArgOffset+15)/16)*16;
6998 ArgOffset += 12*16;
6999 for (unsigned i = 0; i != NumOps; ++i) {
7000 SDValue Arg = OutVals[i];
7001 EVT ArgType = Outs[i].VT;
7002 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
7003 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
7004 if (++j > NumVRs) {
7006 // We are emitting Altivec params in order.
7007 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
7008 isPPC64, CFlags.IsTailCall, true, MemOpChains,
7009 TailCallArguments, dl);
7010 ArgOffset += 16;
7011 }
7012 }
7013 }
7014 }
7015
7016 if (!MemOpChains.empty())
7018
7019 // On Darwin, R12 must contain the address of an indirect callee. This does
7020 // not mean the MTCTR instruction must use R12; it's easier to model this as
7021 // an extra parameter, so do that.
7022 if (CFlags.IsIndirect) {
7023 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7024 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
7025 PPC::R12), Callee));
7026 }
7027
7028 // Build a sequence of copy-to-reg nodes chained together with token chain
7029 // and flag operands which copy the outgoing args into the appropriate regs.
7031 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
7032 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
7033 RegsToPass[i].second, InFlag);
7034 InFlag = Chain.getValue(1);
7035 }
7036
7037 if (CFlags.IsTailCall)
7038 PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
7040
7041 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7042 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7043}
7044
7045static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
7047 CCState &State) {
7048
7049 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
7050 State.getMachineFunction().getSubtarget());
7051 const bool IsPPC64 = Subtarget.isPPC64();
7052 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
7053 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
7054
7055 assert((!ValVT.isInteger() ||
7056 (ValVT.getSizeInBits() <= RegVT.getSizeInBits())) &&
7057 "Integer argument exceeds register size: should have been legalized");
7058
7059 if (ValVT == MVT::f128)
7060 report_fatal_error("f128 is unimplemented on AIX.");
7061
7062 if (ArgFlags.isNest())
7063 report_fatal_error("Nest arguments are unimplemented.");
7064
7065 if (ValVT.isVector() || LocVT.isVector())
7066 report_fatal_error("Vector arguments are unimplemented on AIX.");
7067
7068 static const MCPhysReg GPR_32[] = {// 32-bit registers.
7069 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7070 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7071 static const MCPhysReg GPR_64[] = {// 64-bit registers.
7072 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7073 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7074
7075 if (ArgFlags.isByVal()) {
7076 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
7077 report_fatal_error("Pass-by-value arguments with alignment greater than "
7078 "register width are not supported.");
7079
7080 const unsigned ByValSize = ArgFlags.getByValSize();
7081
7082 // An empty aggregate parameter takes up no storage and no registers,
7083 // but needs a MemLoc for a stack slot for the formal arguments side.
7084 if (ByValSize == 0) {
7086 State.getNextStackOffset(), RegVT,
7087 LocInfo));
7088 return false;
7089 }
7090
7091 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
7092 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
7093 for (const unsigned E = Offset + StackSize; Offset < E;
7094 Offset += PtrAlign.value()) {
7095 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
7096 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7097 else {
7100 LocInfo));
7101 break;
7102 }
7103 }
7104 return false;
7105 }
7106
7107 // Arguments always reserve parameter save area.
7108 switch (ValVT.SimpleTy) {
7109 default:
7110 report_fatal_error("Unhandled value type for argument.");
7111 case MVT::i64:
7112 // i64 arguments should have been split to i32 for PPC32.
7113 assert(IsPPC64 && "PPC32 should have split i64 values.");
7115 case MVT::i1:
7116 case MVT::i32: {
7117 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
7118 // AIX integer arguments are always passed in register width.
7119 if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
7120 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
7122 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
7123 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7124 else
7125 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
7126
7127 return false;
7128 }
7129 case MVT::f32:
7130 case MVT::f64: {
7131 // Parameter save area (PSA) is reserved even if the float passes in fpr.
7132 const unsigned StoreSize = LocVT.getStoreSize();
7133 // Floats are always 4-byte aligned in the PSA on AIX.
7134 // This includes f64 in 64-bit mode for ABI compatibility.
7135 const unsigned Offset =
7136 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
7137 unsigned FReg = State.AllocateReg(FPR);
7138 if (FReg)
7139 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7140
7141 // Reserve and initialize GPRs or initialize the PSA as required.
7142 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
7143 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
7144 assert(FReg && "An FPR should be available when a GPR is reserved.");
7145 if (State.isVarArg()) {
7146 // Successfully reserved GPRs are only initialized for vararg calls.
7147 // Custom handling is required for:
7148 // f64 in PPC32 needs to be split into 2 GPRs.
7149 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7150 State.addLoc(
7151 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7152 }
7153 } else {
7154 // If there are insufficient GPRs, the PSA needs to be initialized.
7155 // Initialization occurs even if an FPR was initialized for
7156 // compatibility with the AIX XL compiler. The full memory for the
7157 // argument will be initialized even if a prior word is saved in GPR.
7158 // A custom memLoc is used when the argument also passes in FPR so
7159 // that the callee handling can skip over it easily.
7160 State.addLoc(
7161 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7162 LocInfo)
7163 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7164 break;
7165 }
7166 }
7167
7168 return false;
7169 }
7170 }
7171 return true;
7172}
7173
7175 bool IsPPC64) {
7176 assert((IsPPC64 || SVT != MVT::i64) &&
7177 "i64 should have been split for 32-bit codegen.");
7178
7179 switch (SVT) {
7180 default:
7181 report_fatal_error("Unexpected value type for formal argument");
7182 case MVT::i1:
7183 case MVT::i32:
7184 case MVT::i64:
7185 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7186 case MVT::f32:
7187 return &PPC::F4RCRegClass;
7188 case MVT::f64:
7189 return &PPC::F8RCRegClass;
7190 }
7191}
7192
7194 SelectionDAG &DAG, SDValue ArgValue,
7195 MVT LocVT, const SDLoc &dl) {
7196 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7197 assert(ValVT.getSizeInBits() < LocVT.getSizeInBits());
7198
7199 if (Flags.isSExt())
7200 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7201 DAG.getValueType(ValVT));
7202 else if (Flags.isZExt())
7203 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7204 DAG.getValueType(ValVT));
7205
7206 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7207}
7208
7209static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7210 const unsigned LASize = FL->getLinkageSize();
7211
7212 if (PPC::GPRCRegClass.contains(Reg)) {
7213 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7214 "Reg must be a valid argument register!");
7215 return LASize + 4 * (Reg - PPC::R3);
7216 }
7217
7218 if (PPC::G8RCRegClass.contains(Reg)) {
7219 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7220 "Reg must be a valid argument register!");
7221 return LASize + 8 * (Reg - PPC::X3);
7222 }
7223
7224 llvm_unreachable("Only general purpose registers expected.");
7225}
7226
7227// AIX ABI Stack Frame Layout:
7228//
7229// Low Memory +--------------------------------------------+
7230// SP +---> | Back chain | ---+
7231// | +--------------------------------------------+ |
7232// | | Saved Condition Register | |
7233// | +--------------------------------------------+ |
7234// | | Saved Linkage Register | |
7235// | +--------------------------------------------+ | Linkage Area
7236// | | Reserved for compilers | |
7237// | +--------------------------------------------+ |
7238// | | Reserved for binders | |
7239// | +--------------------------------------------+ |
7240// | | Saved TOC pointer | ---+
7241// | +--------------------------------------------+
7242// | | Parameter save area |
7243// | +--------------------------------------------+
7244// | | Alloca space |
7245// | +--------------------------------------------+
7246// | | Local variable space |
7247// | +--------------------------------------------+
7248// | | Float/int conversion temporary |
7249// | +--------------------------------------------+
7250// | | Save area for AltiVec registers |
7251// | +--------------------------------------------+
7252// | | AltiVec alignment padding |
7253// | +--------------------------------------------+
7254// | | Save area for VRSAVE register |
7255// | +--------------------------------------------+
7256// | | Save area for General Purpose registers |
7257// | +--------------------------------------------+
7258// | | Save area for Floating Point registers |
7259// | +--------------------------------------------+
7260// +---- | Back chain |
7261// High Memory +--------------------------------------------+
7262//
7263// Specifications:
7264// AIX 7.2 Assembler Language Reference
7265// Subroutine linkage convention
7266
7267SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7268 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7269 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7270 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7271
7272 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7273 CallConv == CallingConv::Fast) &&
7274 "Unexpected calling convention!");
7275
7276 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7277 report_fatal_error("Tail call support is unimplemented on AIX.");
7278
7279 if (useSoftFloat())
7280 report_fatal_error("Soft float support is unimplemented on AIX.");
7281
7282 const PPCSubtarget &Subtarget =
7283 static_cast<const PPCSubtarget &>(DAG.getSubtarget());
7284 if (Subtarget.hasQPX())
7285 report_fatal_error("QPX support is not supported on AIX.");
7286
7287 const bool IsPPC64 = Subtarget.isPPC64();
7288 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7289
7290 // Assign locations to all of the incoming arguments.
7293 MachineFrameInfo &MFI = MF.getFrameInfo();
7294 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7295
7296 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7297 // Reserve space for the linkage area on the stack.
7298 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7299 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7300 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7301
7303
7304 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7305 CCValAssign &VA = ArgLocs[I++];
7306 MVT LocVT = VA.getLocVT();
7307 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7308
7309 // For compatibility with the AIX XL compiler, the float args in the
7310 // parameter save area are initialized even if the argument is available
7311 // in register. The caller is required to initialize both the register
7312 // and memory, however, the callee can choose to expect it in either.
7313 // The memloc is dismissed here because the argument is retrieved from
7314 // the register.
7315 if (VA.isMemLoc() && VA.needsCustom())
7316 continue;
7317
7318 if (Flags.isByVal() && VA.isMemLoc()) {
7319 const unsigned Size =
7320 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7321 PtrByteSize);
7322 const int FI = MF.getFrameInfo().CreateFixedObject(
7323 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7324 /* IsAliased */ true);
7325 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7326 InVals.push_back(FIN);
7327
7328 continue;
7329 }
7330
7331 if (Flags.isByVal()) {
7332 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7333
7334 const MCPhysReg ArgReg = VA.getLocReg();
7335 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7336
7337 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7338 report_fatal_error("Over aligned byvals not supported yet.");
7339
7340 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7341 const int FI = MF.getFrameInfo().CreateFixedObject(
7342 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7343 /* IsAliased */ true);
7344 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7345 InVals.push_back(FIN);
7346
7347 // Add live ins for all the RegLocs for the same ByVal.
7348 const TargetRegisterClass *RegClass =
7349 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7350
7351 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7352 unsigned Offset) {
7353 const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7354 // Since the callers side has left justified the aggregate in the
7355 // register, we can simply store the entire register into the stack
7356 // slot.
7357 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7358 // The store to the fixedstack object is needed becuase accessing a
7359 // field of the ByVal will use a gep and load. Ideally we will optimize
7360 // to extracting the value from the register directly, and elide the
7361 // stores when the arguments address is not taken, but that will need to
7362 // be future work.
7363 SDValue Store =
7364 DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom,
7365 DAG.getObjectPtrOffset(dl, FIN, Offset),
7366 MachinePointerInfo::getFixedStack(MF, FI, Offset));
7367
7368 MemOps.push_back(Store);
7369 };
7370
7371 unsigned Offset = 0;
7372 HandleRegLoc(VA.getLocReg(), Offset);
7374 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7375 Offset += PtrByteSize) {
7376 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7377 "RegLocs should be for ByVal argument.");
7378
7379 const CCValAssign RL = ArgLocs[I++];
7380 HandleRegLoc(RL.getLocReg(), Offset);
7381 }
7382
7383 if (Offset != StackSize) {
7384 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7385 "Expected MemLoc for remaining bytes.");
7386 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7387 // Consume the MemLoc.The InVal has already been emitted, so nothing
7388 // more needs to be done.
7389 ++I;
7390 }
7391
7392 continue;
7393 }
7394
7395 EVT ValVT = VA.getValVT();
7396 if (VA.isRegLoc() && !VA.needsCustom()) {
7398 unsigned VReg =
7399 MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
7400 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7401 if (ValVT.isScalarInteger() &&
7402 (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
7403 ArgValue =
7404 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7405 }
7406 InVals.push_back(ArgValue);
7407 continue;
7408 }
7409 if (VA.isMemLoc()) {
7410 const unsigned LocSize = LocVT.getStoreSize();
7411 const unsigned ValSize = ValVT.getStoreSize();
7412 assert((ValSize <= LocSize) &&
7413 "Object size is larger than size of MemLoc");
7414 int CurArgOffset = VA.getLocMemOffset();
7415 // Objects are right-justified because AIX is big-endian.
7416 if (LocSize > ValSize)
7417 CurArgOffset += LocSize - ValSize;
7418 // Potential tail calls could cause overwriting of argument stack slots.
7419 const bool IsImmutable =
7421 (CallConv == CallingConv::Fast));
7422 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7423 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7424 SDValue ArgValue =
7425 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7426 InVals.push_back(ArgValue);
7427 continue;
7428 }
7429 }
7430
7431 // On AIX a minimum of 8 words is saved to the parameter save area.
7432 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7433 // Area that is at least reserved in the caller of this function.
7434 unsigned CallerReservedArea =
7435 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7436
7437 // Set the size that is at least reserved in caller of this function. Tail
7438 // call optimized function's reserved stack space needs to be aligned so
7439 // that taking the difference between two stack areas will result in an
7440 // aligned stack.
7443 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7445
7446 if (isVarArg) {
7447 FuncInfo->setVarArgsFrameIndex(
7448 MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7450
7451 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7452 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7453
7454 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7455 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7456 const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7457
7458 // The fixed integer arguments of a variadic function are stored to the
7459 // VarArgsFrameIndex on the stack so that they may be loaded by
7460 // dereferencing the result of va_next.
7461 for (unsigned GPRIndex =
7462 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7464
7465 const unsigned VReg =
7466 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7467 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7468
7469 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7470 SDValue Store =
7471 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7472 MemOps.push_back(Store);
7473 // Increment the address for the next argument to store.
7475 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7476 }
7477 }
7478
7479 if (!MemOps.empty())
7480 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7481
7482 return Chain;
7483}
7484
7485SDValue PPCTargetLowering::LowerCall_AIX(
7486 SDValue Chain, SDValue Callee, CallFlags CFlags,
7488 const SmallVectorImpl<SDValue> &OutVals,
7489 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7491 const CallBase *CB) const {
7492 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7493 // AIX ABI stack frame layout.
7494
7495 assert((CFlags.CallConv == CallingConv::C ||
7496 CFlags.CallConv == CallingConv::Cold ||
7497 CFlags.CallConv == CallingConv::Fast) &&
7498 "Unexpected calling convention!");
7499
7500 if (CFlags.IsPatchPoint)
7501 report_fatal_error("This call type is unimplemented on AIX.");
7502
7503 const PPCSubtarget& Subtarget =
7504 static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7505 if (Subtarget.hasQPX())
7506 report_fatal_error("QPX is not supported on AIX.");
7507 if (Subtarget.hasAltivec())
7508 report_fatal_error("Altivec support is unimplemented on AIX.");
7509
7512 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7513 *DAG.getContext());
7514
7515 // Reserve space for the linkage save area (LSA) on the stack.
7516 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7517 // [SP][CR][LR][2 x reserved][TOC].
7518 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7519 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7520 const bool IsPPC64 = Subtarget.isPPC64();
7521 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7522 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7523 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7524 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7525
7526 // The prolog code of the callee may store up to 8 GPR argument registers to
7527 // the stack, allowing va_start to index over them in memory if the callee
7528 // is variadic.
7529 // Because we cannot tell if this is needed on the caller side, we have to
7530 // conservatively assume that it is needed. As such, make sure we have at
7531 // least enough stack space for the caller to store the 8 GPRs.
7532 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7533 const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7534 CCInfo.getNextStackOffset());
7535
7536 // Adjust the stack pointer for the new arguments...
7537 // These operations are automatically eliminated by the prolog/epilog pass.
7538 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7539 SDValue CallSeqStart = Chain;
7540
7543
7544 // Set up a copy of the stack pointer for loading and storing any
7545 // arguments that may not fit in the registers available for argument
7546 // passing.
7547 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7548 : DAG.getRegister(PPC::R1, MVT::i32);
7549
7550 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7551 const unsigned ValNo = ArgLocs[I].getValNo();
7552 SDValue Arg = OutVals[ValNo];
7553 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7554
7555 if (Flags.isByVal()) {
7556 const unsigned ByValSize = Flags.getByValSize();
7557
7558 // Nothing to do for zero-sized ByVals on the caller side.
7559 if (!ByValSize) {
7560 ++I;
7561 continue;
7562 }
7563
7564 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7565 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7566 (LoadOffset != 0)
7568 : Arg,
7569 MachinePointerInfo(), VT);
7570 };
7571
7572 unsigned LoadOffset = 0;
7573
7574 // Initialize registers, which are fully occupied by the by-val argument.
7575 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7577 MemOpChains.push_back(Load.getValue(1));
7579 const CCValAssign &ByValVA = ArgLocs[I++];
7580 assert(ByValVA.getValNo() == ValNo &&
7581 "Unexpected location for pass-by-value argument.");
7582 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7583 }
7584
7585 if (LoadOffset == ByValSize)
7586 continue;
7587
7588 // There must be one more loc to handle the remainder.
7589 assert(ArgLocs[I].getValNo() == ValNo &&
7590 "Expected additional location for by-value argument.");
7591
7592 if (ArgLocs[I].isMemLoc()) {
7593 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7594 const CCValAssign &ByValVA = ArgLocs[I++];
7596 // Only memcpy the bytes that don't pass in register.
7597 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7598 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7599 (LoadOffset != 0) ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
7600 : Arg,
7601 DAG.getObjectPtrOffset(dl, StackPtr, ByValVA.getLocMemOffset()),
7602 CallSeqStart, MemcpyFlags, DAG, dl);
7603 continue;
7604 }
7605
7606 // Initialize the final register residue.
7607 // Any residue that occupies the final by-val arg register must be
7608 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7609 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7610 // 2 and 1 byte loads.
7611 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7612 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7613 "Unexpected register residue for by-value argument.");
7615 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7616 const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7617 const MVT VT =
7618 N == 1 ? MVT::i8
7619 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7621 MemOpChains.push_back(Load.getValue(1));
7622 LoadOffset += N;
7623 Bytes += N;
7624
7625 // By-val arguments are passed left-justfied in register.
7626 // Every load here needs to be shifted, otherwise a full register load
7627 // should have been used.
7628 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7629 "Unexpected load emitted during handling of pass-by-value "
7630 "argument.");
7631 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7633 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7636 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7639 : ShiftedLoad;
7640 }
7641
7642 const CCValAssign &ByValVA = ArgLocs[I++];
7643 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7644 continue;
7645 }
7646
7647 CCValAssign &VA = ArgLocs[I++];
7648 const MVT LocVT = VA.getLocVT();
7649 const MVT ValVT = VA.getValVT();
7650
7651 switch (VA.getLocInfo()) {
7652 default:
7653 report_fatal_error("Unexpected argument extension type.");
7654 case CCValAssign::Full:
7655 break;
7656 case CCValAssign::ZExt:
7657 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7658 break;
7659 case CCValAssign::SExt:
7660 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7661 break;
7662 }
7663
7664 if (VA.isRegLoc() && !VA.needsCustom()) {
7665 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7666 continue;
7667 }
7668
7669 if (VA.isMemLoc()) {
7670 SDValue PtrOff =
7671 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7672 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7673 MemOpChains.push_back(
7674 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7675
7676 continue;
7677 }
7678
7679 // Custom handling is used for GPR initializations for vararg float
7680 // arguments.
7681 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7682 ValVT.isFloatingPoint() && LocVT.isInteger() &&
7683 "Unexpected register handling for calling convention.");
7684
7687
7688 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7689 // f32 in 32-bit GPR
7690 // f64 in 64-bit GPR
7691 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7692 else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
7693 // f32 in 64-bit GPR.
7694 RegsToPass.push_back(std::make_pair(
7695 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7696 else {
7697 // f64 in two 32-bit GPRs
7698 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7699 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7700 "Unexpected custom register for argument!");
7701 CCValAssign &GPR1 = VA;
7703 DAG.getConstant(32, dl, MVT::i8));
7704 RegsToPass.push_back(std::make_pair(
7705 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7706
7707 if (I != E) {
7708 // If only 1 GPR was available, there will only be one custom GPR and
7709 // the argument will also pass in memory.
7711 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7712 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7713 CCValAssign &GPR2 = ArgLocs[I++];
7714 RegsToPass.push_back(std::make_pair(
7715 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7716 }
7717 }
7718 }
7719 }
7720
7721 if (!MemOpChains.empty())
7723
7724 // For indirect calls, we need to save the TOC base to the stack for
7725 // restoration after the call.
7726 if (CFlags.IsIndirect) {
7727 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7728 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7730 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7731 const unsigned TOCSaveOffset =
7732 Subtarget.getFrameLowering()->getTOCSaveOffset();
7733
7734 setUsesTOCBasePtr(DAG);
7735 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7736 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7738 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7739 Chain = DAG.getStore(
7740 Val.getValue(1), dl, Val, AddPtr,
7741 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7742 }
7743
7744 // Build a sequence of copy-to-reg nodes chained together with token chain
7745 // and flag operands which copy the outgoing args into the appropriate regs.
7747 for (auto Reg : RegsToPass) {
7748 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7749 InFlag = Chain.getValue(1);
7750 }
7751
7752 const int SPDiff = 0;
7753 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7754 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7755}
7756
7757bool
7758PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7759 MachineFunction &MF, bool isVarArg,
7761 LLVMContext &Context) const {
7763 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7764 return CCInfo.CheckReturn(
7765 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7767 : RetCC_PPC);
7768}
7769
7770SDValue
7771PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7772 bool isVarArg,
7774 const SmallVectorImpl<SDValue> &OutVals,
7775 const SDLoc &dl, SelectionDAG &DAG) const {
7777 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7778 *DAG.getContext());
7779 CCInfo.AnalyzeReturn(Outs,
7780 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7782 : RetCC_PPC);
7783
7784 SDValue Flag;
7786
7787 // Copy the result values into the output registers.
7788 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7789 CCValAssign &VA = RVLocs[i];
7790 assert(VA.isRegLoc() && "Can only return in registers!");
7791
7792 SDValue Arg = OutVals[RealResIdx];
7793
7794 switch (VA.getLocInfo()) {
7795 default: llvm_unreachable("Unknown loc info!");
7796 case CCValAssign::Full: break;
7797 case CCValAssign::AExt:
7798 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7799 break;
7800 case CCValAssign::ZExt:
7801 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7802 break;
7803 case CCValAssign::SExt:
7804 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7805 break;
7806 }
7807 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7808 bool isLittleEndian = Subtarget.isLittleEndian();
7809 // Legalize ret f64 -> ret 2 x i32.
7810 SDValue SVal =
7812 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7813 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7814 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7816 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7817 Flag = Chain.getValue(1);
7818 VA = RVLocs[++i]; // skip ahead to next loc
7819 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7820 } else
7821 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7822 Flag = Chain.getValue(1);
7823 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7824 }
7825
7826 RetOps[0] = Chain; // Update chain.
7827
7828 // Add the flag if we have it.
7829 if (Flag.getNode())
7830 RetOps.push_back(Flag);
7831
7832 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7833}
7834
7835SDValue
7836PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7837 SelectionDAG &DAG) const {
7838 SDLoc dl(Op);
7839
7840 // Get the correct type for integers.
7841 EVT IntVT = Op.getValueType();
7842
7843 // Get the inputs.
7844 SDValue Chain = Op.getOperand(0);
7845 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7846 // Build a DYNAREAOFFSET node.
7847 SDValue Ops[2] = {Chain, FPSIdx};
7848 SDVTList VTs = DAG.getVTList(IntVT);
7849 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7850}
7851
7852SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7853 SelectionDAG &DAG) const {
7854 // When we pop the dynamic allocation we need to restore the SP link.
7855 SDLoc dl(Op);
7856
7857 // Get the correct type for pointers.
7859
7860 // Construct the stack pointer operand.
7861 bool isPPC64 = Subtarget.isPPC64();
7862 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7863 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7864
7865 // Get the operands for the STACKRESTORE.
7866 SDValue Chain = Op.getOperand(0);
7867 SDValue SaveSP = Op.getOperand(1);
7868
7869 // Load the old link SP.
7871 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7872
7873 // Restore the stack pointer.
7874 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7875
7876 // Store the old link SP.
7877 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7878}
7879
7880SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7882 bool isPPC64 = Subtarget.isPPC64();
7884
7885 // Get current frame pointer save index. The users of this index will be
7886 // primarily DYNALLOC instructions.
7888 int RASI = FI->getReturnAddrSaveIndex();
7889
7890 // If the frame pointer save index hasn't been defined yet.
7891 if (!RASI) {
7892 // Find out what the fix offset of the frame pointer save area.
7893 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7894 // Allocate the frame index for frame pointer save area.
7895 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7896 // Save the result.
7898 }
7899 return DAG.getFrameIndex(RASI, PtrVT);
7900}
7901
7902SDValue
7903PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7905 bool isPPC64 = Subtarget.isPPC64();
7907
7908 // Get current frame pointer save index. The users of this index will be
7909 // primarily DYNALLOC instructions.
7911 int FPSI = FI->getFramePointerSaveIndex();
7912
7913 // If the frame pointer save index hasn't been defined yet.
7914 if (!FPSI) {
7915 // Find out what the fix offset of the frame pointer save area.
7916 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7917 // Allocate the frame index for frame pointer save area.
7918 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7919 // Save the result.
7921 }
7922 return DAG.getFrameIndex(FPSI, PtrVT);
7923}
7924
7925SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7926 SelectionDAG &DAG) const {
7928 // Get the inputs.
7929 SDValue Chain = Op.getOperand(0);
7930 SDValue Size = Op.getOperand(1);
7931 SDLoc dl(Op);
7932
7933 // Get the correct type for pointers.
7935 // Negate the size.
7937 DAG.getConstant(0, dl, PtrVT), Size);
7938 // Construct a node for the frame pointer save index.
7939 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7940 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7941 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7942 if (hasInlineStackProbe(MF))
7943 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7944 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7945}
7946
7947SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7948 SelectionDAG &DAG) const {
7950
7951 bool isPPC64 = Subtarget.isPPC64();
7953
7954 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7955 return DAG.getFrameIndex(FI, PtrVT);
7956}
7957
7958SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7959 SelectionDAG &DAG) const {
7960 SDLoc DL(Op);
7961 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7963 Op.getOperand(0), Op.getOperand(1));
7964}
7965
7966SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7967 SelectionDAG &DAG) const {
7968 SDLoc DL(Op);
7970 Op.getOperand(0), Op.getOperand(1));
7971}
7972
7973SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7974 if (Op.getValueType().isVector())
7975 return LowerVectorLoad(Op, DAG);
7976
7977 assert(Op.getValueType() == MVT::i1 &&
7978 "Custom lowering only for i1 loads");
7979
7980 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7981
7982 SDLoc dl(Op);
7984
7985 SDValue Chain = LD->getChain();
7986 SDValue BasePtr = LD->getBasePtr();
7987 MachineMemOperand *MMO = LD->getMemOperand();
7988
7989 SDValue NewLD =
7990 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7991 BasePtr, MVT::i8, MMO);
7993
7994 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7995 return DAG.getMergeValues(Ops, dl);
7996}
7997
7998SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7999 if (Op.getOperand(1).getValueType().isVector())
8000 return LowerVectorStore(Op, DAG);
8001
8002 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8003 "Custom lowering only for i1 stores");
8004
8005 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8006
8007 SDLoc dl(Op);
8009
8010 SDValue Chain = ST->getChain();
8011 SDValue BasePtr = ST->getBasePtr();
8012 SDValue Value = ST->getValue();
8013 MachineMemOperand *MMO = ST->getMemOperand();
8014
8016 Value);
8017 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8018}
8019
8020// FIXME: Remove this once the ANDI glue bug is fixed:
8021SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8022 assert(Op.getValueType() == MVT::i1 &&
8023 "Custom lowering only for i1 results");
8024
8025 SDLoc DL(Op);
8026 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8027}
8028
8029SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8030 SelectionDAG &DAG) const {
8031
8032 // Implements a vector truncate that fits in a vector register as a shuffle.
8033 // We want to legalize vector truncates down to where the source fits in
8034 // a vector register (and target is therefore smaller than vector register
8035 // size). At that point legalization will try to custom lower the sub-legal
8036 // result and get here - where we can contain the truncate as a single target
8037 // operation.
8038
8039 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8040 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8041 //
8042 // We will implement it for big-endian ordering as this (where x denotes
8043 // undefined):
8044 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8045 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8046 //
8047 // The same operation in little-endian ordering will be:
8048 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8049 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8050
8051 assert(Op.getValueType().isVector() && "Vector type expected.");
8052
8053 SDLoc DL(Op);
8054 SDValue N1 = Op.getOperand(0);
8055 unsigned SrcSize = N1.getValueType().getSizeInBits();
8056 assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
8057 SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8058
8059 EVT TrgVT = Op.getValueType();
8060 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8061 EVT EltVT = TrgVT.getVectorElementType();
8062 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8064
8065 // First list the elements we want to keep.
8066 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8068 if (Subtarget.isLittleEndian())
8069 for (unsigned i = 0; i < TrgNumElts; ++i)
8070 ShuffV.push_back(i * SizeMult);
8071 else
8072 for (unsigned i = 1; i <= TrgNumElts; ++i)
8073 ShuffV.push_back(i * SizeMult - 1);
8074
8075 // Populate the remaining elements with undefs.
8076 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8077 // ShuffV.push_back(i + WideNumElts);
8078 ShuffV.push_back(WideNumElts + 1);
8079
8081 return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
8082}
8083
8084/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8085/// possible.
8086SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8087 // Not FP? Not a fsel.
8088 if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
8089 !Op.getOperand(2).getValueType().isFloatingPoint())
8090 return Op;
8091
8092 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8093
8094 EVT ResVT = Op.getValueType();
8095 EVT CmpVT = Op.getOperand(0).getValueType();
8096 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8097 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8098 SDLoc dl(Op);
8099 SDNodeFlags Flags = Op.getNode()->getFlags();
8100
8101 // We have xsmaxcdp/xsmincdp which are OK to emit even in the
8102 // presence of infinities.
8103 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8104 switch (CC) {
8105 default:
8106 break;
8107 case ISD::SETOGT:
8108 case ISD::SETGT:
8109 return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
8110 case ISD::SETOLT:
8111 case ISD::SETLT:
8112 return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
8113 }
8114 }
8115
8116 // We might be able to do better than this under some circumstances, but in
8117 // general, fsel-based lowering of select is a finite-math-only optimization.
8118 // For more information, see section F.3 of the 2.06 ISA specification.
8119 // With ISA 3.0
8120 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8121 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8122 return Op;
8123
8124 // If the RHS of the comparison is a 0.0, we don't need to do the
8125 // subtraction at all.
8126 SDValue Sel1;
8127 if (isFloatingPointZero(RHS))
8128 switch (CC) {
8129 default: break; // SETUO etc aren't handled by fsel.
8130 case ISD::SETNE:
8131 std::swap(TV, FV);
8133 case ISD::SETEQ:
8134 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8135 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8136 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8137 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8139 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8140 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8141 case ISD::SETULT:
8142 case ISD::SETLT:
8143 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8145 case ISD::SETOGE:
8146 case ISD::SETGE:
8147 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8148 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8149 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8150 case ISD::SETUGT:
8151 case ISD::SETGT:
8152 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8154 case ISD::SETOLE:
8155 case ISD::SETLE:
8156 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8157 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8158 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8159 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8160 }
8161
8162 SDValue Cmp;
8163 switch (CC) {
8164 default: break; // SETUO etc aren't handled by fsel.
8165 case ISD::SETNE:
8166 std::swap(TV, FV);
8168 case ISD::SETEQ:
8169 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8170 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8171 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8172 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8173 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8175 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8176 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8177 case ISD::SETULT:
8178 case ISD::SETLT:
8179 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8180 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8181 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8182 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8183 case ISD::SETOGE:
8184 case ISD::SETGE:
8185 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8186 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8187 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8188 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8189 case ISD::SETUGT:
8190 case ISD::SETGT:
8191 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8192 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8193 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8194 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8195 case ISD::SETOLE:
8196 case ISD::SETLE:
8197 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8198 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8199 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8200 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8201 }
8202 return Op;
8203}
8204
8205void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8206 SelectionDAG &DAG,
8207 const SDLoc &dl) const {
8208 assert(Op.getOperand(0).getValueType().isFloatingPoint());
8209 SDValue Src = Op.getOperand(0);
8210 if (Src.getValueType() == MVT::f32)
8211 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8212
8213 SDValue Tmp;
8214 switch (Op.getSimpleValueType().SimpleTy) {
8215 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8216 case MVT::i32:
8217 Tmp = DAG.getNode(
8218 Op.getOpcode() == ISD::FP_TO_SINT
8220 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
8221 dl, MVT::f64, Src);
8222 break;
8223 case MVT::i64:
8224 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
8225 "i64 FP_TO_UINT is supported only with FPCVT");
8226 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
8227 PPCISD::FCTIDUZ,
8228 dl, MVT::f64, Src);
8229 break;
8230 }
8231
8232 // Convert the FP value to an int value through memory.
8233 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8234 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
8236 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8237 MachinePointerInfo MPI =
8239
8240 // Emit a store to the stack slot.
8241 SDValue Chain;
8242 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8243 if (i32Stack) {
8245 Alignment = Align(4);
8246 MachineMemOperand *MMO =
8247 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8248 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
8249 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8250 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8251 } else
8252 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI, Alignment);
8253
8254 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8255 // add in a bias on big endian.
8256 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8257 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8258 DAG.getConstant(4, dl, FIPtr.getValueType()));
8259 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8260 }
8261
8262 RLI.Chain = Chain;
8263 RLI.Ptr = FIPtr;
8264 RLI.MPI = MPI;
8265 RLI.Alignment = Alignment;
8266}
8267
8268/// Custom lowers floating point to integer conversions to use
8269/// the direct move instructions available in ISA 2.07 to avoid the
8270/// need for load/store combinations.
8271SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8272 SelectionDAG &DAG,
8273 const SDLoc &dl) const {
8274 assert(Op.getOperand(0).getValueType().isFloatingPoint());
8275 SDValue Src = Op.getOperand(0);
8276
8277 if (Src.getValueType() == MVT::f32)
8278 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8279
8280 SDValue Tmp;
8281 switch (Op.getSimpleValueType().SimpleTy) {
8282 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8283 case MVT::i32:
8284 Tmp = DAG.getNode(
8285 Op.getOpcode() == ISD::FP_TO_SINT
8287 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
8288 dl, MVT::f64, Src);
8289 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
8290 break;
8291 case MVT::i64:
8292 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
8293 "i64 FP_TO_UINT is supported only with FPCVT");
8294 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
8295 PPCISD::FCTIDUZ,
8296 dl, MVT::f64, Src);
8297 Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
8298 break;
8299 }
8300 return Tmp;
8301}
8302
8303SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8304 const SDLoc &dl) const {
8305
8306 // FP to INT conversions are legal for f128.
8307 if (Op->getOperand(0).getValueType() == MVT::f128)
8308 return Op;
8309
8310 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8311 // PPC (the libcall is not available).
8312 if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
8313 if (Op.getValueType() == MVT::i32) {
8314 if (Op.getOpcode() == ISD::FP_TO_SINT) {
8316 MVT::f64, Op.getOperand(0),
8317 DAG.getIntPtrConstant(0, dl));
8319 MVT::f64, Op.getOperand(0),
8320 DAG.getIntPtrConstant(1, dl));
8321
8322 // Add the two halves of the long double in round-to-zero mode.
8323 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8324
8325 // Now use a smaller FP_TO_SINT.
8326 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8327 }
8328 if (Op.getOpcode() == ISD::FP_TO_UINT) {
8329 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8331 SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
8332 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8333 // FIXME: generated code sucks.
8334 // TODO: Are there fast-math-flags to propagate to this FSUB?
8335 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
8336 Op.getOperand(0), Tmp);
8337 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8338 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
8339 DAG.getConstant(0x80000000, dl, MVT::i32));
8340 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
8341 Op.getOperand(0));
8342 return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
8343 ISD::SETGE);
8344 }
8345 }
8346
8347 return SDValue();
8348 }
8349
8350 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8351 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8352
8353 ReuseLoadInfo RLI;
8354 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8355
8356 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8357 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8358}
8359
8360// We're trying to insert a regular store, S, and then a load, L. If the
8361// incoming value, O, is a load, we might just be able to have our load use the
8362// address used by O. However, we don't know if anything else will store to
8363// that address before we can load from it. To prevent this situation, we need
8364// to insert our load, L, into the chain as a peer of O. To do this, we give L
8365// the same chain operand as O, we create a token factor from the chain results
8366// of O and L, and we replace all uses of O's chain result with that token
8367// factor (see spliceIntoChain below for this last part).
8368bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8369 ReuseLoadInfo &RLI,
8370 SelectionDAG &DAG,
8371 ISD::LoadExtType ET) const {
8372 SDLoc dl(Op);
8373 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8374 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8375 if (ET == ISD::NON_EXTLOAD &&
8376 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8377 isOperationLegalOrCustom(Op.getOpcode(),
8378 Op.getOperand(0).getValueType())) {
8379
8380 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8381 return true;
8382 }
8383
8385 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8386 LD->isNonTemporal())
8387 return false;
8388 if (LD->getMemoryVT() != MemVT)
8389 return false;
8390
8391 RLI.Ptr = LD->getBasePtr();
8392 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8393 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8394 "Non-pre-inc AM on PPC?");
8395 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8396 LD->getOffset());
8397 }
8398
8399 RLI.Chain = LD->getChain();
8400 RLI.MPI = LD->getPointerInfo();
8401 RLI.IsDereferenceable = LD->isDereferenceable();
8402 RLI.IsInvariant = LD->isInvariant();
8403 RLI.Alignment = LD->getAlign();
8404 RLI.AAInfo = LD->getAAInfo();
8405 RLI.Ranges = LD->getRanges();
8406
8407 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8408 return true;
8409}
8410
8411// Given the head of the old chain, ResChain, insert a token factor containing
8412// it and NewResChain, and make users of ResChain now be users of that token
8413// factor.
8414// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8415void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8417 SelectionDAG &DAG) const {
8418 if (!ResChain)
8419 return;
8420
8421 SDLoc dl(NewResChain);
8422
8425 assert(TF.getNode() != NewResChain.getNode() &&
8426 "A new TF really is required here");
8427
8428 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8429 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8430}
8431
8432/// Analyze profitability of direct move
8433/// prefer float load to int load plus direct move
8434/// when there is no integer use of int load
8435bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8436 SDNode *Origin = Op.getOperand(0).getNode();
8437 if (Origin->getOpcode() != ISD::LOAD)
8438 return true;
8439
8440 // If there is no LXSIBZX/LXSIHZX, like Power8,
8441 // prefer direct move if the memory size is 1 or 2 bytes.
8442 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8443 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8444 return true;
8445
8446 for (SDNode::use_iterator UI = Origin->use_begin(),
8447 UE = Origin->use_end();
8448 UI != UE; ++UI) {
8449
8450 // Only look at the users of the loaded value.
8451 if (UI.getUse().get().getResNo() != 0)
8452 continue;
8453
8454 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8455 UI->getOpcode() != ISD::UINT_TO_FP)
8456 return true;
8457 }
8458
8459 return false;
8460}
8461
8462/// Custom lowers integer to floating point conversions to use
8463/// the direct move instructions available in ISA 2.07 to avoid the
8464/// need for load/store combinations.
8465SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8466 SelectionDAG &DAG,
8467 const SDLoc &dl) const {
8468 assert((Op.getValueType() == MVT::f32 ||
8469 Op.getValueType() == MVT::f64) &&
8470 "Invalid floating point type as target of conversion");
8471 assert(Subtarget.hasFPCVT() &&
8472 "Int to FP conversions with direct moves require FPCVT");
8473 SDValue FP;
8474 SDValue Src = Op.getOperand(0);
8475 bool SinglePrec = Op.getValueType() == MVT::f32;
8477 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
8479 (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
8480
8481 if (WordInt) {
8482 FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
8483 dl, MVT::f64, Src);
8484 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
8485 }
8486 else {
8487 FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
8488 FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
8489 }
8490
8491 return FP;
8492}
8493
8494static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8495
8496 EVT VecVT = Vec.getValueType();
8497 assert(VecVT.isVector() && "Expected a vector type.");
8498 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8499
8500 EVT EltVT = VecVT.getVectorElementType();
8501 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8503
8504 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8506 Ops[0] = Vec;
8507 SDValue UndefVec = DAG.getUNDEF(VecVT);
8508 for (unsigned i = 1; i < NumConcat; ++i)
8509 Ops[i] = UndefVec;
8510
8511 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8512}
8513
8514SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8515 const SDLoc &dl) const {
8516
8517 unsigned Opc = Op.getOpcode();
8518 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
8519 "Unexpected conversion type");
8520 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8521 "Supports conversions to v2f64/v4f32 only.");
8522
8523 bool SignedConv = Opc == ISD::SINT_TO_FP;
8524 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8525
8526 SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
8527 EVT WideVT = Wide.getValueType();
8528 unsigned WideNumElts = WideVT.getVectorNumElements();
8530
8532 for (unsigned i = 0; i < WideNumElts; ++i)
8533 ShuffV.push_back(i + WideNumElts);
8534
8535 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8536 int SaveElts = FourEltRes ? 4 : 2;
8537 if (Subtarget.isLittleEndian())
8538 for (int i = 0; i < SaveElts; i++)
8539 ShuffV[i * Stride] = i;
8540 else
8541 for (int i = 1; i <= SaveElts; i++)
8542 ShuffV[i * Stride - 1] = i - 1;
8543
8545 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8547
8549 if (SignedConv) {
8551 EVT ExtVT = Op.getOperand(0).getValueType();
8552 if (Subtarget.hasP9Altivec())
8553 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8554 IntermediateVT.getVectorNumElements());
8555
8557 DAG.getValueType(ExtVT));
8558 } else
8560
8561 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8562}
8563
8564SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8565 SelectionDAG &DAG) const {
8566 SDLoc dl(Op);
8567
8568 EVT InVT = Op.getOperand(0).getValueType();
8569 EVT OutVT = Op.getValueType();
8570 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8571 isOperationCustom(Op.getOpcode(), InVT))
8572 return LowerINT_TO_FPVector(Op, DAG, dl);
8573
8574 // Conversions to f128 are legal.
8575 if (Op.getValueType() == MVT::f128)
8576 return Op;
8577
8578 if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
8579 if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
8580 return SDValue();
8581
8582 SDValue Value = Op.getOperand(0);
8583 // The values are now known to be -1 (false) or 1 (true). To convert this
8584 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8585 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8587
8588 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8589
8591
8592 if (Op.getValueType() != MVT::v4f64)
8593 Value = DAG.getNode(ISD::FP_ROUND, dl,
8594 Op.getValueType(), Value,
8595 DAG.getIntPtrConstant(1, dl));
8596 return Value;
8597 }
8598
8599 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8600 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8601 return SDValue();
8602
8603 if (Op.getOperand(0).getValueType() == MVT::i1)
8604 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
8605 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8606 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8607
8608 // If we have direct moves, we can do all the conversion, skip the store/load
8609 // however, without FPCVT we can't do most conversions.
8610 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8611 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8612 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8613
8614 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
8615 "UINT_TO_FP is supported only with FPCVT");
8616
8617 // If we have FCFIDS, then use it when converting to single-precision.
8618 // Otherwise, convert to double-precision and then round.
8619 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
8620 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
8622 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
8623 : PPCISD::FCFID);
8624 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
8625 ? MVT::f32
8626 : MVT::f64;
8627
8628 if (Op.getOperand(0).getValueType() == MVT::i64) {
8629 SDValue SINT = Op.getOperand(0);
8630 // When converting to single-precision, we actually need to convert
8631 // to double-precision first and then round to single-precision.
8632 // To avoid double-rounding effects during that operation, we have
8633 // to prepare the input operand. Bits that might be truncated when
8634 // converting to double-precision are replaced by a bit that won't
8635 // be lost at this stage, but is below the single-precision rounding
8636 // position.
8637 //
8638 // However, if -enable-unsafe-fp-math is in effect, accept double
8639 // rounding to avoid the extra overhead.
8640 if (Op.getValueType() == MVT::f32 &&
8641 !Subtarget.hasFPCVT() &&
8643
8644 // Twiddle input to make sure the low 11 bits are zero. (If this
8645 // is the case, we are guaranteed the value will fit into the 53 bit
8646 // mantissa of an IEEE double-precision value without rounding.)
8647 // If any of those low 11 bits were not zero originally, make sure
8648 // bit 12 (value 2048) is set instead, so that the final rounding
8649 // to single-precision gets the correct result.
8651 SINT, DAG.getConstant(2047, dl, MVT::i64));
8652 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8653 Round, DAG.getConstant(2047, dl, MVT::i64));
8654 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8655 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8656 Round, DAG.getConstant(-2048, dl, MVT::i64));
8657
8658 // However, we cannot use that value unconditionally: if the magnitude
8659 // of the input value is small, the bit-twiddling we did above might
8660 // end up visibly changing the output. Fortunately, in that case, we
8661 // don't need to twiddle bits since the original input will convert
8662 // exactly to double-precision floating-point already. Therefore,
8663 // construct a conditional to use the original value if the top 11
8664 // bits are all sign-bit copies, and use the rounded value computed
8665 // above otherwise.
8667 SINT, DAG.getConstant(53, dl, MVT::i32));
8668 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8669 Cond, DAG.getConstant(1, dl, MVT::i64));
8670 Cond = DAG.getSetCC(
8671 dl,
8673 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8674
8676 }
8677
8678 ReuseLoadInfo RLI;
8679 SDValue Bits;
8680
8682 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8683 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8684 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8685 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8686 } else if (Subtarget.hasLFIWAX() &&
8687 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8688 MachineMemOperand *MMO =
8690 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8691 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8694 Ops, MVT::i32, MMO);
8695 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8696 } else if (Subtarget.hasFPCVT() &&
8697 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8698 MachineMemOperand *MMO =
8700 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8701 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8704 Ops, MVT::i32, MMO);
8705 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8706 } else if (((Subtarget.hasLFIWAX() &&
8707 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8708 (Subtarget.hasFPCVT() &&
8709 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8710 SINT.getOperand(0).getValueType() == MVT::i32) {
8711 MachineFrameInfo &MFI = MF.getFrameInfo();
8713
8714 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8715 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8716
8717 SDValue Store =
8718 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
8720 DAG.getMachineFunction(), FrameIdx));
8721
8722 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8723 "Expected an i32 store");
8724
8725 RLI.Ptr = FIdx;
8726 RLI.Chain = Store;
8727 RLI.MPI =
8729 RLI.Alignment = Align(4);
8730
8731 MachineMemOperand *MMO =
8733 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8734 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8735 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8736 PPCISD::LFIWZX : PPCISD::LFIWAX,
8737 dl, DAG.getVTList(MVT::f64, MVT::Other),
8738 Ops, MVT::i32, MMO);
8739 } else
8740 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8741
8742 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
8743
8744 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
8745 FP = DAG.getNode(ISD::FP_ROUND, dl,
8746 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
8747 return FP;
8748 }
8749
8750 assert(Op.getOperand(0).getValueType() == MVT::i32 &&
8751 "Unhandled INT_TO_FP type in custom expander!");
8752 // Since we only generate this in 64-bit mode, we can take advantage of
8753 // 64-bit registers. In particular, sign extend the input value into the
8754 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8755 // then lfd it and fcfid it.
8757 MachineFrameInfo &MFI = MF.getFrameInfo();
8759
8760 SDValue Ld;
8761 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8762 ReuseLoadInfo RLI;
8763 bool ReusingLoad;
8764 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
8765 DAG))) {
8766 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8767 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8768
8769 SDValue Store =
8770 DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
8772 DAG.getMachineFunction(), FrameIdx));
8773
8774 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8775 "Expected an i32 store");
8776
8777 RLI.Ptr = FIdx;
8778 RLI.Chain = Store;
8779 RLI.MPI =
8781 RLI.Alignment = Align(4);
8782 }
8783
8784 MachineMemOperand *MMO =
8786 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8787 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8788 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
8789 PPCISD::LFIWZX : PPCISD::LFIWAX,
8790 dl, DAG.getVTList(MVT::f64, MVT::Other),
8791 Ops, MVT::i32, MMO);
8792 if (ReusingLoad)
8793 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8794 } else {
8795 assert(Subtarget.isPPC64() &&
8796 "i32->FP without LFIWAX supported only on PPC64");
8797
8798 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8799 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8800
8802 Op.getOperand(0));
8803
8804 // STD the extended value into the stack slot.
8805 SDValue Store = DAG.getStore(
8806 DAG.getEntryNode(), dl, Ext64, FIdx,
8808
8809 // Load the value as a double.
8810 Ld = DAG.getLoad(
8811 MVT::f64, dl, Store, FIdx,
8813 }
8814
8815 // FCFID it and return it.
8816 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
8817 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
8818 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8819 DAG.getIntPtrConstant(0, dl));
8820 return FP;
8821}
8822
8823SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8824 SelectionDAG &DAG) const {
8825 SDLoc dl(Op);
8826 /*
8827 The rounding mode is in bits 30:31 of FPSR, and has the following
8828 settings:
8829 00 Round to nearest
8830 01 Round to 0
8831 10 Round to +inf
8832 11 Round to -inf
8833
8834 FLT_ROUNDS, on the other hand, expects the following:
8835 -1 Undefined
8836 0 Round to 0
8837 1 Round to nearest
8838 2 Round to +inf
8839 3 Round to -inf
8840
8841 To perform the conversion, we do:
8842 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8843 */
8844
8846 EVT VT = Op.getValueType();
8848
8849 // Save FP Control Word to register
8850 SDValue Chain = Op.getOperand(0);
8851 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8852 Chain = MFFS.getValue(1);
8853
8854 // Save FP register to stack slot
8855 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8856 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8857 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8858
8859 // Load FP Control Word from low 32 bits of stack slot.
8860 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8861 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8862 SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8863 Chain = CWD.getValue(1);
8864
8865 // Transform as necessary
8866 SDValue CWD1 =
8867 DAG.getNode(ISD::AND, dl, MVT::i32,
8868 CWD, DAG.getConstant(3, dl, MVT::i32));
8869 SDValue CWD2 =
8870 DAG.getNode(ISD::SRL, dl, MVT::i32,
8871 DAG.getNode(ISD::AND, dl, MVT::i32,
8872 DAG.getNode(ISD::XOR, dl, MVT::i32,
8873 CWD, DAG.getConstant(3, dl, MVT::i32)),
8874 DAG.getConstant(3, dl, MVT::i32)),
8875 DAG.getConstant(1, dl, MVT::i32));
8876
8877 SDValue RetVal =
8878 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8879
8880 RetVal =
8881 DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8882 dl, VT, RetVal);
8883
8884 return DAG.getMergeValues({RetVal, Chain}, dl);
8885}
8886
8887SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8888 EVT VT = Op.getValueType();
8889 unsigned BitWidth = VT.getSizeInBits();
8890 SDLoc dl(Op);
8891 assert(Op.getNumOperands() == 3 &&
8892 VT == Op.getOperand(1).getValueType() &&
8893 "Unexpected SHL!");
8894
8895 // Expand into a bunch of logical ops. Note that these ops
8896 // depend on the PPC behavior for oversized shift amounts.
8897 SDValue Lo = Op.getOperand(0);
8898 SDValue Hi = Op.getOperand(1);
8899 SDValue Amt = Op.getOperand(2);
8900 EVT AmtVT = Amt.getValueType();
8901
8902 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8903 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8904 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8905 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8906 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8907 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8908 DAG.getConstant(-BitWidth, dl, AmtVT));
8909 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8910 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8911 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8912 SDValue OutOps[] = { OutLo, OutHi };
8913 return DAG.getMergeValues(OutOps, dl);
8914}
8915
8916SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8917 EVT VT = Op.getValueType();
8918 SDLoc dl(Op);
8919 unsigned BitWidth = VT.getSizeInBits();
8920 assert(Op.getNumOperands() == 3 &&
8921 VT == Op.getOperand(1).getValueType() &&
8922 "Unexpected SRL!");
8923
8924 // Expand into a bunch of logical ops. Note that these ops
8925 // depend on the PPC behavior for oversized shift amounts.
8926 SDValue Lo = Op.getOperand(0);
8927 SDValue Hi = Op.getOperand(1);
8928 SDValue Amt = Op.getOperand(2);
8929 EVT AmtVT = Amt.getValueType();
8930
8931 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8932 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8933 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8934 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8935 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8936 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8937 DAG.getConstant(-BitWidth, dl, AmtVT));
8938 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8939 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8940 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8941 SDValue OutOps[] = { OutLo, OutHi };
8942 return DAG.getMergeValues(OutOps, dl);
8943}
8944
8945SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8946 SDLoc dl(Op);
8947 EVT VT = Op.getValueType();
8948 unsigned BitWidth = VT.getSizeInBits();
8949 assert(Op.getNumOperands() == 3 &&
8950 VT == Op.getOperand(1).getValueType() &&
8951 "Unexpected SRA!");
8952
8953 // Expand into a bunch of logical ops, followed by a select_cc.
8954 SDValue Lo = Op.getOperand(0);
8955 SDValue Hi = Op.getOperand(1);
8956 SDValue Amt = Op.getOperand(2);
8957 EVT AmtVT = Amt.getValueType();
8958
8959 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8960 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8961 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8962 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8963 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8964 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8965 DAG.getConstant(-BitWidth, dl, AmtVT));
8966 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8967 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8968 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8969 Tmp4, Tmp6, ISD::SETLE);
8970 SDValue OutOps[] = { OutLo, OutHi };
8971 return DAG.getMergeValues(OutOps, dl);
8972}
8973
8974//===----------------------------------------------------------------------===//
8975// Vector related lowering.
8976//
8977
8978/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8979/// element size of SplatSize. Cast the result to VT.
8980static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8981 SelectionDAG &DAG, const SDLoc &dl) {
8982 static const MVT VTys[] = { // canonical VT to use for each size.
8984 };
8985
8986 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8987
8988 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8989 if (Val == ((1LU << (SplatSize * 8)) - 1)) {
8990 SplatSize = 1;
8991 Val = 0xFF;
8992 }
8993
8995
8996 // Build a canonical splat for this value.
8997 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8998}
8999
9000/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9001/// specified intrinsic ID.
9002static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9003 const SDLoc &dl, EVT DestVT = MVT::Other) {
9004 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9005 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9006 DAG.getConstant(IID, dl, MVT::i32), Op);
9007}
9008
9009/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9010/// specified intrinsic ID.
9011static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9012 SelectionDAG &DAG, const SDLoc &dl,
9013 EVT DestVT = MVT::Other) {
9014 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9015 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9016 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9017}
9018
9019/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9020/// specified intrinsic ID.
9021static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9022 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9023 EVT DestVT = MVT::Other) {
9024 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9025 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9026 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9027}
9028
9029/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9030/// amount. The result has the specified value type.
9031static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9032 SelectionDAG &DAG, const SDLoc &dl) {
9033 // Force LHS/RHS to be the right type.
9034 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9035 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9036
9037 int Ops[16];
9038 for (unsigned i = 0; i != 16; ++i)
9039 Ops[i] = i + Amt;
9040 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9041 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9042}
9043
9044/// Do we have an efficient pattern in a .td file for this node?
9045///
9046/// \param V - pointer to the BuildVectorSDNode being matched
9047/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9048///
9049/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9050/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9051/// the opposite is true (expansion is beneficial) are:
9052/// - The node builds a vector out of integers that are not 32 or 64-bits
9053/// - The node builds a vector out of constants
9054/// - The node is a "load-and-splat"
9055/// In all other cases, we will choose to keep the BUILD_VECTOR.
9057 bool HasDirectMove,
9058 bool HasP8Vector) {
9059 EVT VecVT = V->getValueType(0);
9060 bool RightType = VecVT == MVT::v2f64 ||
9061 (HasP8Vector && VecVT == MVT::v4f32) ||
9062 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9063 if (!RightType)
9064 return false;
9065
9066 bool IsSplat = true;
9067 bool IsLoad = false;
9068 SDValue Op0 = V->getOperand(0);
9069
9070 // This function is called in a block that confirms the node is not a constant
9071 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9072 // different constants.
9073 if (V->isConstant())
9074 return false;
9075 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9076 if (V->getOperand(i).isUndef())
9077 return false;
9078 // We want to expand nodes that represent load-and-splat even if the
9079 // loaded value is a floating point truncation or conversion to int.
9080 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9081 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9082 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9083 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9084 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9085 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9086 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9087 IsLoad = true;
9088 // If the operands are different or the input is not a load and has more
9089 // uses than just this BV node, then it isn't a splat.
9090 if (V->getOperand(i) != Op0 ||
9091 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9092 IsSplat = false;
9093 }
9094 return !(IsSplat && IsLoad);
9095}
9096
9097// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9098SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9099
9100 SDLoc dl(Op);
9101 SDValue Op0 = Op->getOperand(0);
9102
9103 if ((Op.getValueType() != MVT::f128) ||
9104 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9105 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9106 (Op0.getOperand(1).getValueType() != MVT::i64))
9107 return SDValue();
9108
9109 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9110 Op0.getOperand(1));
9111}
9112
9113static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9114 const SDValue *InputLoad = &Op;
9115 if (InputLoad->getOpcode() == ISD::BITCAST)
9116 InputLoad = &InputLoad->getOperand(0);
9117 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9120 InputLoad = &InputLoad->getOperand(0);
9121 }
9122 if (InputLoad->getOpcode() != ISD::LOAD)
9123 return nullptr;
9125 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9126}
9127
9128// Convert the argument APFloat to a single precision APFloat if there is no
9129// loss in information during the conversion to single precision APFloat and the
9130// resulting number is not a denormal number. Return true if successful.
9141
9142// Bitcast the argument APInt to a double and convert it to a single precision
9143// APFloat, bitcast the APFloat to an APInt and assign it to the original
9144// argument if there is no loss in information during the conversion from
9145// double to single precision APFloat and the resulting number is not a denormal
9146// number. Return true if successful.
9148 double DpValue = ArgAPInt.bitsToDouble();
9151 if (Success)
9152 ArgAPInt = APFloatDp.bitcastToAPInt();
9153 return Success;
9154}
9155
9156// If this is a case we can't handle, return null and let the default
9157// expansion code take care of it. If we CAN select this case, and if it
9158// selects to a single instruction, return Op. Otherwise, if we can codegen
9159// this case more efficiently than a constant pool load, lower it to the
9160// sequence of ops that should be used.
9161SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9162 SelectionDAG &DAG) const {
9163 SDLoc dl(Op);
9165 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9166
9167 if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
9168 // We first build an i32 vector, load it into a QPX register,
9169 // then convert it to a floating-point vector and compare it
9170 // to a zero vector to get the boolean result.
9172 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
9173 MachinePointerInfo PtrInfo =
9176 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9177
9178 assert(BVN->getNumOperands() == 4 &&
9179 "BUILD_VECTOR for v4i1 does not have 4 operands");
9180
9181 bool IsConst = true;
9182 for (unsigned i = 0; i < 4; ++i) {
9183 if (BVN->getOperand(i).isUndef()) continue;
9184 if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
9185 IsConst = false;
9186 break;
9187 }
9188 }
9189
9190 if (IsConst) {
9191 Constant *One =
9193 Constant *NegOne =
9195
9196 Constant *CV[4];
9197 for (unsigned i = 0; i < 4; ++i) {
9198 if (BVN->getOperand(i).isUndef())
9200 else if (isNullConstant(BVN->getOperand(i)))
9201 CV[i] = NegOne;
9202 else
9203 CV[i] = One;
9204 }
9205
9207 SDValue CPIdx =
9208 DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), Align(16));
9209
9210 SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
9211 SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
9212 return DAG.getMemIntrinsicNode(
9213 PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
9215 }
9216
9218 for (unsigned i = 0; i < 4; ++i) {
9219 if (BVN->getOperand(i).isUndef()) continue;
9220
9221 unsigned Offset = 4*i;
9222 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
9223 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
9224
9225 unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
9226 if (StoreSize > 4) {
9227 Stores.push_back(
9228 DAG.getTruncStore(DAG.getEntryNode(), dl, BVN->getOperand(i), Idx,
9229 PtrInfo.getWithOffset(Offset), MVT::i32));
9230 } else {
9231 SDValue StoreValue = BVN->getOperand(i);
9232 if (StoreSize < 4)
9233 StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
9234
9235 Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, StoreValue, Idx,
9236 PtrInfo.getWithOffset(Offset)));
9237 }
9238 }
9239
9241 if (!Stores.empty())
9242 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9243 else
9244 StoreChain = DAG.getEntryNode();
9245
9246 // Now load from v4i32 into the QPX register; this will extend it to
9247 // v4i64 but not yet convert it to a floating point. Nevertheless, this
9248 // is typed as v4f64 because the QPX register integer states are not
9249 // explicitly represented.
9250
9251 SDValue Ops[] = {StoreChain,
9252 DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
9253 FIdx};
9254 SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
9255
9257 dl, VTs, Ops, MVT::v4i32, PtrInfo);
9259 DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
9260 LoadedVect);
9261
9262 SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
9263
9264 return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
9265 }
9266
9267 // All other QPX vectors are handled by generic code.
9268 if (Subtarget.hasQPX())
9269 return SDValue();
9270
9271 // Check if this is a splat of a constant value.
9273 unsigned SplatBitSize;
9274 bool HasAnyUndefs;
9275 bool BVNIsConstantSplat =
9276 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9277 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9278
9279 // If it is a splat of a double, check if we can shrink it to a 32 bit
9280 // non-denormal float which when converted back to double gives us the same
9281 // double. This is to exploit the XXSPLTIDP instruction.
9282 if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
9283 (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
9287 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9288 return DAG.getBitcast(Op.getValueType(), SplatNode);
9289 }
9290
9291 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9292
9293 bool IsPermutedLoad = false;
9294 const SDValue *InputLoad =
9295 getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
9296 // Handle load-and-splat patterns as we have instructions that will do this
9297 // in one go.
9298 if (InputLoad && DAG.isSplatValue(Op, true)) {
9300
9301 // We have handling for 4 and 8 byte elements.
9302 unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
9303
9304 // Checking for a single use of this load, we have to check for vector
9305 // width (128 bits) / ElementSize uses (since each operand of the
9306 // BUILD_VECTOR is a separate use of the value.
9307 if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
9308 ((Subtarget.hasVSX() && ElementSize == 64) ||
9309 (Subtarget.hasP9Vector() && ElementSize == 32))) {
9310 SDValue Ops[] = {
9311 LD->getChain(), // Chain
9312 LD->getBasePtr(), // Ptr
9313 DAG.getValueType(Op.getValueType()) // VT
9314 };
9315 return
9317 DAG.getVTList(Op.getValueType(), MVT::Other),
9318 Ops, LD->getMemoryVT(), LD->getMemOperand());
9319 }
9320 }
9321
9322 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
9323 // lowered to VSX instructions under certain conditions.
9324 // Without VSX, there is no pattern more efficient than expanding the node.
9325 if (Subtarget.hasVSX() &&
9327 Subtarget.hasP8Vector()))
9328 return Op;
9329 return SDValue();
9330 }
9331
9332 uint64_t SplatBits = APSplatBits.getZExtValue();
9333 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9334 unsigned SplatSize = SplatBitSize / 8;
9335
9336 // First, handle single instruction cases.
9337
9338 // All zeros?
9339 if (SplatBits == 0) {
9340 // Canonicalize all zero vectors to be v4i32.
9341 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9342 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9343 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9344 }
9345 return Op;
9346 }
9347
9348 // We have XXSPLTIW for constant splats four bytes wide.
9349 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9350 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9351 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9352 // turned into a 4-byte splat of 0xABABABAB.
9353 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9354 return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
9355 Op.getValueType(), DAG, dl);
9356
9357 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9358 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9359 dl);
9360
9361 // We have XXSPLTIB for constant splats one byte wide.
9362 if (Subtarget.hasP9Vector() && SplatSize == 1)
9363 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9364 dl);
9365
9366 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9367 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9368 (32-SplatBitSize));
9369 if (SextVal >= -16 && SextVal <= 15)
9370 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9371 dl);
9372
9373 // Two instruction sequences.
9374
9375 // If this value is in the range [-32,30] and is even, use:
9376 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9377 // If this value is in the range [17,31] and is odd, use:
9378 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9379 // If this value is in the range [-31,-17] and is odd, use:
9380 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9381 // Note the last two are three-instruction sequences.
9382 if (SextVal >= -32 && SextVal <= 31) {
9383 // To avoid having these optimizations undone by constant folding,
9384 // we convert to a pseudo that will be expanded later into one of
9385 // the above forms.
9387 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9388 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9389 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9390 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9391 if (VT == Op.getValueType())
9392 return RetVal;
9393 else
9394 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9395 }
9396
9397 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9398 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9399 // for fneg/fabs.
9400 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9401 // Make -1 and vspltisw -1:
9403
9404 // Make the VSLW intrinsic, computing 0x8000_0000.
9405 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9406 OnesV, DAG, dl);
9407
9408 // xor by OnesV to invert it.
9409 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9410 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9411 }
9412
9413 // Check to see if this is a wide variety of vsplti*, binop self cases.
9414 static const signed char SplatCsts[] = {
9415 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9416 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9417 };
9418
9419 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9420 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9421 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9422 int i = SplatCsts[idx];
9423
9424 // Figure out what shift amount will be used by altivec if shifted by i in
9425 // this splat size.
9426 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9427
9428 // vsplti + shl self.
9429 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9431 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9432 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9433 Intrinsic::ppc_altivec_vslw
9434 };
9435 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9436 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9437 }
9438
9439 // vsplti + srl self.
9440 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9442 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9443 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9444 Intrinsic::ppc_altivec_vsrw
9445 };
9446 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9447 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9448 }
9449
9450 // vsplti + sra self.
9451 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9453 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9454 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9455 Intrinsic::ppc_altivec_vsraw
9456 };
9457 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9458 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9459 }
9460
9461 // vsplti + rol self.
9462 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9463 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9465 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9466 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9467 Intrinsic::ppc_altivec_vrlw
9468 };
9469 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9470 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9471 }
9472
9473 // t = vsplti c, result = vsldoi t, t, 1
9474 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9476 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9477 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9478 }
9479 // t = vsplti c, result = vsldoi t, t, 2
9480 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9482 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9483 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9484 }
9485 // t = vsplti c, result = vsldoi t, t, 3
9486 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9488 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9489 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9490 }
9491 }
9492
9493 return SDValue();
9494}
9495
9496/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9497/// the specified operations to build the shuffle.
9499 SDValue RHS, SelectionDAG &DAG,
9500 const SDLoc &dl) {
9501 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9502 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9503 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9504
9505 enum {
9506 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9507 OP_VMRGHW,
9508 OP_VMRGLW,
9513 OP_VSLDOI4,
9514 OP_VSLDOI8,
9516 };
9517
9518 if (OpNum == OP_COPY) {
9519 if (LHSID == (1*9+2)*9+3) return LHS;
9520 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9521 return RHS;
9522 }
9523
9527
9528 int ShufIdxs[16];
9529 switch (OpNum) {
9530 default: llvm_unreachable("Unknown i32 permute!");
9531 case OP_VMRGHW:
9532 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9533 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9534 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9535 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9536 break;
9537 case OP_VMRGLW:
9538 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9539 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9540 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9541 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9542 break;
9543 case OP_VSPLTISW0:
9544 for (unsigned i = 0; i != 16; ++i)
9545 ShufIdxs[i] = (i&3)+0;
9546 break;
9547 case OP_VSPLTISW1:
9548 for (unsigned i = 0; i != 16; ++i)
9549 ShufIdxs[i] = (i&3)+4;
9550 break;
9551 case OP_VSPLTISW2:
9552 for (unsigned i = 0; i != 16; ++i)
9553 ShufIdxs[i] = (i&3)+8;
9554 break;
9555 case OP_VSPLTISW3:
9556 for (unsigned i = 0; i != 16; ++i)
9557 ShufIdxs[i] = (i&3)+12;
9558 break;
9559 case OP_VSLDOI4:
9560 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9561 case OP_VSLDOI8:
9562 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9563 case OP_VSLDOI12:
9564 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9565 }
9566 EVT VT = OpLHS.getValueType();
9570 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9571}
9572
9573/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9574/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9575/// SDValue.
9576SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9577 SelectionDAG &DAG) const {
9578 const unsigned BytesInVector = 16;
9579 bool IsLE = Subtarget.isLittleEndian();
9580 SDLoc dl(N);
9581 SDValue V1 = N->getOperand(0);
9582 SDValue V2 = N->getOperand(1);
9583 unsigned ShiftElts = 0, InsertAtByte = 0;
9584 bool Swap = false;
9585
9586 // Shifts required to get the byte we want at element 7.
9587 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9588 0, 15, 14, 13, 12, 11, 10, 9};
9589 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9590 1, 2, 3, 4, 5, 6, 7, 8};
9591
9592 ArrayRef<int> Mask = N->getMask();
9593 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9594
9595 // For each mask element, find out if we're just inserting something
9596 // from V2 into V1 or vice versa.
9597 // Possible permutations inserting an element from V2 into V1:
9598 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9599 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9600 // ...
9601 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9602 // Inserting from V1 into V2 will be similar, except mask range will be
9603 // [16,31].
9604
9605 bool FoundCandidate = false;
9606 // If both vector operands for the shuffle are the same vector, the mask
9607 // will contain only elements from the first one and the second one will be
9608 // undef.
9609 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9610 // Go through the mask of half-words to find an element that's being moved
9611 // from one vector to the other.
9612 for (unsigned i = 0; i < BytesInVector; ++i) {
9613 unsigned CurrentElement = Mask[i];
9614 // If 2nd operand is undefined, we should only look for element 7 in the
9615 // Mask.
9616 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9617 continue;
9618
9619 bool OtherElementsInOrder = true;
9620 // Examine the other elements in the Mask to see if they're in original
9621 // order.
9622 for (unsigned j = 0; j < BytesInVector; ++j) {
9623 if (j == i)
9624 continue;
9625 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9626 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9627 // in which we always assume we're always picking from the 1st operand.
9628 int MaskOffset =
9629 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9630 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9631 OtherElementsInOrder = false;
9632 break;
9633 }
9634 }
9635 // If other elements are in original order, we record the number of shifts
9636 // we need to get the element we want into element 7. Also record which byte
9637 // in the vector we should insert into.
9639 // If 2nd operand is undefined, we assume no shifts and no swapping.
9640 if (V2.isUndef()) {
9641 ShiftElts = 0;
9642 Swap = false;
9643 } else {
9644 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9648 }
9649 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9650 FoundCandidate = true;
9651 break;
9652 }
9653 }
9654
9655 if (!FoundCandidate)
9656 return SDValue();
9657
9658 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9659 // optionally with VECSHL if shift is required.
9660 if (Swap)
9661 std::swap(V1, V2);
9662 if (V2.isUndef())
9663 V2 = V1;
9664 if (ShiftElts) {
9665 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9666 DAG.getConstant(ShiftElts, dl, MVT::i32));
9667 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9669 }
9670 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9672}
9673
9674/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9675/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9676/// SDValue.
9677SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9678 SelectionDAG &DAG) const {
9679 const unsigned NumHalfWords = 8;
9680 const unsigned BytesInVector = NumHalfWords * 2;
9681 // Check that the shuffle is on half-words.
9682 if (!isNByteElemShuffleMask(N, 2, 1))
9683 return SDValue();
9684
9685 bool IsLE = Subtarget.isLittleEndian();
9686 SDLoc dl(N);
9687 SDValue V1 = N->getOperand(0);
9688 SDValue V2 = N->getOperand(1);
9689 unsigned ShiftElts = 0, InsertAtByte = 0;
9690 bool Swap = false;
9691
9692 // Shifts required to get the half-word we want at element 3.
9693 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9694 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9695
9696 uint32_t Mask = 0;
9697 uint32_t OriginalOrderLow = 0x1234567;
9698 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9699 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9700 // 32-bit space, only need 4-bit nibbles per element.
9701 for (unsigned i = 0; i < NumHalfWords; ++i) {
9702 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9703 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9704 }
9705
9706 // For each mask element, find out if we're just inserting something
9707 // from V2 into V1 or vice versa. Possible permutations inserting an element
9708 // from V2 into V1:
9709 // X, 1, 2, 3, 4, 5, 6, 7
9710 // 0, X, 2, 3, 4, 5, 6, 7
9711 // 0, 1, X, 3, 4, 5, 6, 7
9712 // 0, 1, 2, X, 4, 5, 6, 7
9713 // 0, 1, 2, 3, X, 5, 6, 7
9714 // 0, 1, 2, 3, 4, X, 6, 7
9715 // 0, 1, 2, 3, 4, 5, X, 7
9716 // 0, 1, 2, 3, 4, 5, 6, X
9717 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9718
9719 bool FoundCandidate = false;
9720 // Go through the mask of half-words to find an element that's being moved
9721 // from one vector to the other.
9722 for (unsigned i = 0; i < NumHalfWords; ++i) {
9723 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9724 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9725 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9726 uint32_t TargetOrder = 0x0;
9727
9728 // If both vector operands for the shuffle are the same vector, the mask
9729 // will contain only elements from the first one and the second one will be
9730 // undef.
9731 if (V2.isUndef()) {
9732 ShiftElts = 0;
9733 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9735 Swap = false;
9736 // Skip if not the correct element or mask of other elements don't equal
9737 // to our expected order.
9738 if (MaskOneElt == VINSERTHSrcElem &&
9739 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9740 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9741 FoundCandidate = true;
9742 break;
9743 }
9744 } else { // If both operands are defined.
9745 // Target order is [8,15] if the current mask is between [0,7].
9746 TargetOrder =
9748 // Skip if mask of other elements don't equal our expected order.
9749 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9750 // We only need the last 3 bits for the number of shifts.
9752 : BigEndianShifts[MaskOneElt & 0x7];
9753 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9754 Swap = MaskOneElt < NumHalfWords;
9755 FoundCandidate = true;
9756 break;
9757 }
9758 }
9759 }
9760
9761 if (!FoundCandidate)
9762 return SDValue();
9763
9764 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9765 // optionally with VECSHL if shift is required.
9766 if (Swap)
9767 std::swap(V1, V2);
9768 if (V2.isUndef())
9769 V2 = V1;
9771 if (ShiftElts) {
9772 // Double ShiftElts because we're left shifting on v16i8 type.
9773 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9774 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9775 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9778 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9779 }
9783 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9784}
9785
9786/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9787/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9788/// return the default SDValue.
9789SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9790 SelectionDAG &DAG) const {
9791 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9792 // to v16i8. Peek through the bitcasts to get the actual operands.
9793 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9794 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9795
9796 auto ShuffleMask = SVN->getMask();
9798 SDLoc DL(SVN);
9799
9800 // Check that we have a four byte shuffle.
9801 if (!isNByteElemShuffleMask(SVN, 4, 1))
9802 return SDValue();
9803
9804 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9805 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9806 std::swap(LHS, RHS);
9808 ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9809 }
9810
9811 // Ensure that the RHS is a vector of constants.
9813 if (!BVN)
9814 return SDValue();
9815
9816 // Check if RHS is a splat of 4-bytes (or smaller).
9818 unsigned SplatBitSize;
9819 bool HasAnyUndefs;
9820 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9821 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9822 SplatBitSize > 32)
9823 return SDValue();
9824
9825 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9826 // The instruction splats a constant C into two words of the source vector
9827 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9828 // Thus we check that the shuffle mask is the equivalent of
9829 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9830 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9831 // within each word are consecutive, so we only need to check the first byte.
9832 SDValue Index;
9833 bool IsLE = Subtarget.isLittleEndian();
9834 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9835 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9836 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9837 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9838 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9839 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9840 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9841 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9842 else
9843 return SDValue();
9844
9845 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9846 // for XXSPLTI32DX.
9847 unsigned SplatVal = APSplatValue.getZExtValue();
9848 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9849 SplatVal |= (SplatVal << SplatBitSize);
9850
9853 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9855}
9856
9857/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9858/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9859/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9860/// i.e (or (shl x, C1), (srl x, 128-C1)).
9861SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9862 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9863 assert(Op.getValueType() == MVT::v1i128 &&
9864 "Only set v1i128 as custom, other type shouldn't reach here!");
9865 SDLoc dl(Op);
9866 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9867 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9868 unsigned SHLAmt = N1.getConstantOperandVal(0);
9869 if (SHLAmt % 8 == 0) {
9871 std::iota(Mask.begin(), Mask.end(), 0);
9872 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9873 if (SDValue Shuffle =
9875 DAG.getUNDEF(MVT::v16i8), Mask))
9876 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9877 }
9880 DAG.getConstant(SHLAmt, dl, MVT::i32));
9882 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9884 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9885}
9886
9887/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9888/// is a shuffle we can handle in a single instruction, return it. Otherwise,
9889/// return the code it can be lowered into. Worst case, it can always be
9890/// lowered into a vperm.
9891SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9892 SelectionDAG &DAG) const {
9893 SDLoc dl(Op);
9894 SDValue V1 = Op.getOperand(0);
9895 SDValue V2 = Op.getOperand(1);
9897
9898 // Any nodes that were combined in the target-independent combiner prior
9899 // to vector legalization will not be sent to the target combine. Try to
9900 // combine it here.
9901 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9903 return NewShuffle;
9904 Op = NewShuffle;
9906 V1 = Op.getOperand(0);
9907 V2 = Op.getOperand(1);
9908 }
9909 EVT VT = Op.getValueType();
9910 bool isLittleEndian = Subtarget.isLittleEndian();
9911
9912 unsigned ShiftElts, InsertAtByte;
9913 bool Swap = false;
9914
9915 // If this is a load-and-splat, we can do that with a single instruction
9916 // in some cases. However if the load has multiple uses, we don't want to
9917 // combine it because that will just produce multiple loads.
9918 bool IsPermutedLoad = false;
9920 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9922 InputLoad->hasOneUse()) {
9924 int SplatIdx =
9926
9927 // The splat index for permuted loads will be in the left half of the vector
9928 // which is strictly wider than the loaded value by 8 bytes. So we need to
9929 // adjust the splat index to point to the correct address in memory.
9930 if (IsPermutedLoad) {
9931 assert(isLittleEndian && "Unexpected permuted load on big endian target");
9932 SplatIdx += IsFourByte ? 2 : 1;
9933 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9934 "Splat of a value outside of the loaded memory");
9935 }
9936
9938 // For 4-byte load-and-splat, we need Power9.
9939 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9940 uint64_t Offset = 0;
9941 if (IsFourByte)
9942 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9943 else
9944 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9945
9946 SDValue BasePtr = LD->getBasePtr();
9947 if (Offset != 0)
9949 BasePtr, DAG.getIntPtrConstant(Offset, dl));
9950 SDValue Ops[] = {
9951 LD->getChain(), // Chain
9952 BasePtr, // BasePtr
9953 DAG.getValueType(Op.getValueType()) // VT
9954 };
9955 SDVTList VTL =
9956 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9957 SDValue LdSplt =
9959 Ops, LD->getMemoryVT(), LD->getMemOperand());
9960 if (LdSplt.getValueType() != SVOp->getValueType(0))
9961 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9962 return LdSplt;
9963 }
9964 }
9965 if (Subtarget.hasP9Vector() &&
9967 isLittleEndian)) {
9968 if (Swap)
9969 std::swap(V1, V2);
9972 if (ShiftElts) {
9974 DAG.getConstant(ShiftElts, dl, MVT::i32));
9977 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9978 }
9981 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9982 }
9983
9984 if (Subtarget.hasPrefixInstrs()) {
9986 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9987 return SplatInsertNode;
9988 }
9989
9990 if (Subtarget.hasP9Altivec()) {
9992 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9993 return NewISDNode;
9994
9995 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9996 return NewISDNode;
9997 }
9998
9999 if (Subtarget.hasVSX() &&
10000 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10001 if (Swap)
10002 std::swap(V1, V2);
10004 SDValue Conv2 =
10005 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10006
10008 DAG.getConstant(ShiftElts, dl, MVT::i32));
10009 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10010 }
10011
10012 if (Subtarget.hasVSX() &&
10013 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10014 if (Swap)
10015 std::swap(V1, V2);
10017 SDValue Conv2 =
10018 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10019
10021 DAG.getConstant(ShiftElts, dl, MVT::i32));
10022 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10023 }
10024
10025 if (Subtarget.hasP9Vector()) {
10029 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10030 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10033 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10034 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10037 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10038 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10041 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10042 }
10043 }
10044
10045 if (Subtarget.hasVSX()) {
10046 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10048
10050 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10051 DAG.getConstant(SplatIdx, dl, MVT::i32));
10052 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10053 }
10054
10055 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10056 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10059 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10060 }
10061 }
10062
10063 if (Subtarget.hasQPX()) {
10064 if (VT.getVectorNumElements() != 4)
10065 return SDValue();
10066
10067 if (V2.isUndef()) V2 = V1;
10068
10070 if (AlignIdx != -1) {
10071 return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
10072 DAG.getConstant(AlignIdx, dl, MVT::i32));
10073 } else if (SVOp->isSplat()) {
10074 int SplatIdx = SVOp->getSplatIndex();
10075 if (SplatIdx >= 4) {
10076 std::swap(V1, V2);
10077 SplatIdx -= 4;
10078 }
10079
10080 return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
10081 DAG.getConstant(SplatIdx, dl, MVT::i32));
10082 }
10083
10084 // Lower this into a qvgpci/qvfperm pair.
10085
10086 // Compute the qvgpci literal
10087 unsigned idx = 0;
10088 for (unsigned i = 0; i < 4; ++i) {
10089 int m = SVOp->getMaskElt(i);
10090 unsigned mm = m >= 0 ? (unsigned) m : i;
10091 idx |= mm << (3-i)*3;
10092 }
10093
10095 DAG.getConstant(idx, dl, MVT::i32));
10096 return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
10097 }
10098
10099 // Cases that are handled by instructions that take permute immediates
10100 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10101 // selected by the instruction selector.
10102 if (V2.isUndef()) {
10103 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10108 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10109 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10110 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10111 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10112 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10113 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10114 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10115 (Subtarget.hasP8Altivec() && (
10117 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10118 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10119 return Op;
10120 }
10121 }
10122
10123 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10124 // and produce a fixed permutation. If any of these match, do not lower to
10125 // VPERM.
10126 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10127 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10128 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10129 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10130 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10131 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10132 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10133 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10134 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10135 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10136 (Subtarget.hasP8Altivec() && (
10137 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10138 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10139 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10140 return Op;
10141
10142 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10143 // perfect shuffle table to emit an optimal matching sequence.
10144 ArrayRef<int> PermMask = SVOp->getMask();
10145
10146 unsigned PFIndexes[4];
10147 bool isFourElementShuffle = true;
10148 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
10149 unsigned EltNo = 8; // Start out undef.
10150 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10151 if (PermMask[i*4+j] < 0)
10152 continue; // Undef, ignore it.
10153
10154 unsigned ByteSource = PermMask[i*4+j];
10155 if ((ByteSource & 3) != j) {
10156 isFourElementShuffle = false;
10157 break;
10158 }
10159
10160 if (EltNo == 8) {
10161 EltNo = ByteSource/4;
10162 } else if (EltNo != ByteSource/4) {
10163 isFourElementShuffle = false;
10164 break;
10165 }
10166 }
10167 PFIndexes[i] = EltNo;
10168 }
10169
10170 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10171 // perfect shuffle vector to determine if it is cost effective to do this as
10172 // discrete instructions, or whether we should use a vperm.
10173 // For now, we skip this for little endian until such time as we have a
10174 // little-endian perfect shuffle table.
10175 if (isFourElementShuffle && !isLittleEndian) {
10176 // Compute the index in the perfect shuffle table.
10177 unsigned PFTableIndex =
10178 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
10179
10181 unsigned Cost = (PFEntry >> 30);
10182
10183 // Determining when to avoid vperm is tricky. Many things affect the cost
10184 // of vperm, particularly how many times the perm mask needs to be computed.
10185 // For example, if the perm mask can be hoisted out of a loop or is already
10186 // used (perhaps because there are multiple permutes with the same shuffle
10187 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
10188 // the loop requires an extra register.
10189 //
10190 // As a compromise, we only emit discrete instructions if the shuffle can be
10191 // generated in 3 or fewer operations. When we have loop information
10192 // available, if this block is within a loop, we should avoid using vperm
10193 // for 3-operation perms and use a constant pool load instead.
10194 if (Cost < 3)
10195 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10196 }
10197
10198 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10199 // vector that will get spilled to the constant pool.
10200 if (V2.isUndef()) V2 = V1;
10201
10202 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10203 // that it is in input element units, not in bytes. Convert now.
10204
10205 // For little endian, the order of the input vectors is reversed, and
10206 // the permutation mask is complemented with respect to 31. This is
10207 // necessary to produce proper semantics with the big-endian-biased vperm
10208 // instruction.
10209 EVT EltVT = V1.getValueType().getVectorElementType();
10210 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10211
10213 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10214 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10215
10216 for (unsigned j = 0; j != BytesPerElement; ++j)
10217 if (isLittleEndian)
10218 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10219 dl, MVT::i32));
10220 else
10221 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10222 MVT::i32));
10223 }
10224
10227 LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
10228 LLVM_DEBUG(SVOp->dump());
10229 LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
10230 LLVM_DEBUG(VPermMask.dump());
10231
10232 if (isLittleEndian)
10233 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10234 V2, V1, VPermMask);
10235 else
10236 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10237 V1, V2, VPermMask);
10238}
10239
10240/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10241/// vector comparison. If it is, return true and fill in Opc/isDot with
10242/// information about the intrinsic.
10244 bool &isDot, const PPCSubtarget &Subtarget) {
10245 unsigned IntrinsicID =
10246 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10247 CompareOpc = -1;
10248 isDot = false;
10249 switch (IntrinsicID) {
10250 default:
10251 return false;
10252 // Comparison predicates.
10253 case Intrinsic::ppc_altivec_vcmpbfp_p:
10254 CompareOpc = 966;
10255 isDot = true;
10256 break;
10257 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10258 CompareOpc = 198;
10259 isDot = true;
10260 break;
10261 case Intrinsic::ppc_altivec_vcmpequb_p:
10262 CompareOpc = 6;
10263 isDot = true;
10264 break;
10265 case Intrinsic::ppc_altivec_vcmpequh_p:
10266 CompareOpc = 70;
10267 isDot = true;
10268 break;
10269 case Intrinsic::ppc_altivec_vcmpequw_p:
10270 CompareOpc = 134;
10271 isDot = true;
10272 break;
10273 case Intrinsic::ppc_altivec_vcmpequd_p:
10274 if (Subtarget.hasP8Altivec()) {
10275 CompareOpc = 199;
10276 isDot = true;
10277 } else
10278 return false;
10279 break;
10280 case Intrinsic::ppc_altivec_vcmpneb_p:
10281 case Intrinsic::ppc_altivec_vcmpneh_p:
10282 case Intrinsic::ppc_altivec_vcmpnew_p:
10283 case Intrinsic::ppc_altivec_vcmpnezb_p:
10284 case Intrinsic::ppc_altivec_vcmpnezh_p:
10285 case Intrinsic::ppc_altivec_vcmpnezw_p:
10286 if (Subtarget.hasP9Altivec()) {
10287 switch (IntrinsicID) {
10288 default:
10289 llvm_unreachable("Unknown comparison intrinsic.");
10290 case Intrinsic::ppc_altivec_vcmpneb_p:
10291 CompareOpc = 7;
10292 break;
10293 case Intrinsic::ppc_altivec_vcmpneh_p:
10294 CompareOpc = 71;
10295 break;
10296 case Intrinsic::ppc_altivec_vcmpnew_p:
10297 CompareOpc = 135;
10298 break;
10299 case Intrinsic::ppc_altivec_vcmpnezb_p:
10300 CompareOpc = 263;
10301 break;
10302 case Intrinsic::ppc_altivec_vcmpnezh_p:
10303 CompareOpc = 327;
10304 break;
10305 case Intrinsic::ppc_altivec_vcmpnezw_p:
10306 CompareOpc = 391;
10307 break;
10308 }
10309 isDot = true;
10310 } else
10311 return false;
10312 break;
10313 case Intrinsic::ppc_altivec_vcmpgefp_p:
10314 CompareOpc = 454;
10315 isDot = true;
10316 break;
10317 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10318 CompareOpc = 710;
10319 isDot = true;
10320 break;
10321 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10322 CompareOpc = 774;
10323 isDot = true;
10324 break;
10325 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10326 CompareOpc = 838;
10327 isDot = true;
10328 break;
10329 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10330 CompareOpc = 902;
10331 isDot = true;
10332 break;
10333 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10334 if (Subtarget.hasP8Altivec()) {
10335 CompareOpc = 967;
10336 isDot = true;
10337 } else
10338 return false;
10339 break;
10340 case Intrinsic::ppc_altivec_vcmpgtub_p:
10341 CompareOpc = 518;
10342 isDot = true;
10343 break;
10344 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10345 CompareOpc = 582;
10346 isDot = true;
10347 break;
10348 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10349 CompareOpc = 646;
10350 isDot = true;
10351 break;
10352 case Intrinsic::ppc_altivec_vcmpgtud_p:
10353 if (Subtarget.hasP8Altivec()) {
10354 CompareOpc = 711;
10355 isDot = true;
10356 } else
10357 return false;
10358 break;
10359
10360 // VSX predicate comparisons use the same infrastructure
10361 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10362 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10363 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10364 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10365 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10366 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10367 if (Subtarget.hasVSX()) {
10368 switch (IntrinsicID) {
10369 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10370 CompareOpc = 99;
10371 break;
10372 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10373 CompareOpc = 115;
10374 break;
10375 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10376 CompareOpc = 107;
10377 break;
10378 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10379 CompareOpc = 67;
10380 break;
10381 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10382 CompareOpc = 83;
10383 break;
10384 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10385 CompareOpc = 75;
10386 break;
10387 }
10388 isDot = true;
10389 } else
10390 return false;
10391 break;
10392
10393 // Normal Comparisons.
10394 case Intrinsic::ppc_altivec_vcmpbfp:
10395 CompareOpc = 966;
10396 break;
10397 case Intrinsic::ppc_altivec_vcmpeqfp:
10398 CompareOpc = 198;
10399 break;
10400 case Intrinsic::ppc_altivec_vcmpequb:
10401 CompareOpc = 6;
10402 break;
10403 case Intrinsic::ppc_altivec_vcmpequh:
10404 CompareOpc = 70;
10405 break;
10406 case Intrinsic::ppc_altivec_vcmpequw:
10407 CompareOpc = 134;
10408 break;
10409 case Intrinsic::ppc_altivec_vcmpequd:
10410 if (Subtarget.hasP8Altivec())
10411 CompareOpc = 199;
10412 else
10413 return false;
10414 break;
10415 case Intrinsic::ppc_altivec_vcmpneb:
10416 case Intrinsic::ppc_altivec_vcmpneh:
10417 case Intrinsic::ppc_altivec_vcmpnew:
10418 case Intrinsic::ppc_altivec_vcmpnezb:
10419 case Intrinsic::ppc_altivec_vcmpnezh:
10420 case Intrinsic::ppc_altivec_vcmpnezw:
10421 if (Subtarget.hasP9Altivec())
10422 switch (IntrinsicID) {
10423 default:
10424 llvm_unreachable("Unknown comparison intrinsic.");
10425 case Intrinsic::ppc_altivec_vcmpneb:
10426 CompareOpc = 7;
10427 break;
10428 case Intrinsic::ppc_altivec_vcmpneh:
10429 CompareOpc = 71;
10430 break;
10431 case Intrinsic::ppc_altivec_vcmpnew:
10432 CompareOpc = 135;
10433 break;
10434 case Intrinsic::ppc_altivec_vcmpnezb:
10435 CompareOpc = 263;
10436 break;
10437 case Intrinsic::ppc_altivec_vcmpnezh:
10438 CompareOpc = 327;
10439 break;
10440 case Intrinsic::ppc_altivec_vcmpnezw:
10441 CompareOpc = 391;
10442 break;
10443 }
10444 else
10445 return false;
10446 break;
10447 case Intrinsic::ppc_altivec_vcmpgefp:
10448 CompareOpc = 454;
10449 break;
10450 case Intrinsic::ppc_altivec_vcmpgtfp:
10451 CompareOpc = 710;
10452 break;
10453 case Intrinsic::ppc_altivec_vcmpgtsb:
10454 CompareOpc = 774;
10455 break;
10456 case Intrinsic::ppc_altivec_vcmpgtsh:
10457 CompareOpc = 838;
10458 break;
10459 case Intrinsic::ppc_altivec_vcmpgtsw:
10460 CompareOpc = 902;
10461 break;
10462 case Intrinsic::ppc_altivec_vcmpgtsd:
10463 if (Subtarget.hasP8Altivec())
10464 CompareOpc = 967;
10465 else
10466 return false;
10467 break;
10468 case Intrinsic::ppc_altivec_vcmpgtub:
10469 CompareOpc = 518;
10470 break;
10471 case Intrinsic::ppc_altivec_vcmpgtuh:
10472 CompareOpc = 582;
10473 break;
10474 case Intrinsic::ppc_altivec_vcmpgtuw:
10475 CompareOpc = 646;
10476 break;
10477 case Intrinsic::ppc_altivec_vcmpgtud:
10478 if (Subtarget.hasP8Altivec())
10479 CompareOpc = 711;
10480 else
10481 return false;
10482 break;
10483 }
10484 return true;
10485}
10486
10487/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10488/// lower, do it, otherwise return null.
10489SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10490 SelectionDAG &DAG) const {
10491 unsigned IntrinsicID =
10492 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10493
10494 SDLoc dl(Op);
10495
10496 if (IntrinsicID == Intrinsic::thread_pointer) {
10497 // Reads the thread pointer register, used for __builtin_thread_pointer.
10498 if (Subtarget.isPPC64())
10499 return DAG.getRegister(PPC::X13, MVT::i64);
10500 return DAG.getRegister(PPC::R2, MVT::i32);
10501 }
10502
10503 // If this is a lowered altivec predicate compare, CompareOpc is set to the
10504 // opcode number of the comparison.
10505 int CompareOpc;
10506 bool isDot;
10507 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10508 return SDValue(); // Don't custom lower most intrinsics.
10509
10510 // If this is a non-dot comparison, make the VCMP node and we are done.
10511 if (!isDot) {
10512 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10513 Op.getOperand(1), Op.getOperand(2),
10514 DAG.getConstant(CompareOpc, dl, MVT::i32));
10515 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10516 }
10517
10518 // Create the PPCISD altivec 'dot' comparison node.
10519 SDValue Ops[] = {
10520 Op.getOperand(2), // LHS
10521 Op.getOperand(3), // RHS
10523 };
10524 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10525 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10526
10527 // Now that we have the comparison, emit a copy from the CR to a GPR.
10528 // This is flagged to the above dot comparison.
10529 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10530 DAG.getRegister(PPC::CR6, MVT::i32),
10531 CompNode.getValue(1));
10532
10533 // Unpack the result based on how the target uses it.
10534 unsigned BitNo; // Bit # of CR6.
10535 bool InvertBit; // Invert result?
10536 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10537 default: // Can't happen, don't crash on invalid number though.
10538 case 0: // Return the value of the EQ bit of CR6.
10539 BitNo = 0; InvertBit = false;
10540 break;
10541 case 1: // Return the inverted value of the EQ bit of CR6.
10542 BitNo = 0; InvertBit = true;
10543 break;
10544 case 2: // Return the value of the LT bit of CR6.
10545 BitNo = 2; InvertBit = false;
10546 break;
10547 case 3: // Return the inverted value of the LT bit of CR6.
10548 BitNo = 2; InvertBit = true;
10549 break;
10550 }
10551
10552 // Shift the bit into the low position.
10553 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10554 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10555 // Isolate the bit.
10556 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10557 DAG.getConstant(1, dl, MVT::i32));
10558
10559 // If we are supposed to, toggle the bit.
10560 if (InvertBit)
10561 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10562 DAG.getConstant(1, dl, MVT::i32));
10563 return Flags;
10564}
10565
10566SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10567 SelectionDAG &DAG) const {
10568 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10569 // the beginning of the argument list.
10570 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10571 SDLoc DL(Op);
10572 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10573 case Intrinsic::ppc_cfence: {
10574 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10575 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10576 return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10578 Op.getOperand(ArgStart + 1)),
10579 Op.getOperand(0)),
10580 0);
10581 }
10582 default:
10583 break;
10584 }
10585 return SDValue();
10586}
10587
10588// Lower scalar BSWAP64 to xxbrd.
10589SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10590 SDLoc dl(Op);
10591 // MTVSRDD
10592 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10593 Op.getOperand(0));
10594 // XXBRD
10595 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10596 // MFVSRD
10597 int VectorIndex = 0;
10598 if (Subtarget.isLittleEndian())
10599 VectorIndex = 1;
10601 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10602 return Op;
10603}
10604
10605// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10606// compared to a value that is atomically loaded (atomic loads zero-extend).
10607SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10608 SelectionDAG &DAG) const {
10609 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10610 "Expecting an atomic compare-and-swap here.");
10611 SDLoc dl(Op);
10612 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10613 EVT MemVT = AtomicNode->getMemoryVT();
10614 if (MemVT.getSizeInBits() >= 32)
10615 return Op;
10616
10617 SDValue CmpOp = Op.getOperand(2);
10618 // If this is already correctly zero-extended, leave it alone.
10619 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10621 return Op;
10622
10623 // Clear the high bits of the compare operand.
10624 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10626 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10627 DAG.getConstant(MaskVal, dl, MVT::i32));
10628
10629 // Replace the existing compare operand with the properly zero-extended one.
10631 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10632 Ops.push_back(AtomicNode->getOperand(i));
10633 Ops[2] = NewCmpOp;
10634 MachineMemOperand *MMO = AtomicNode->getMemOperand();
10636 auto NodeTy =
10638 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10639}
10640
10641SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10642 SelectionDAG &DAG) const {
10643 SDLoc dl(Op);
10644 // Create a stack slot that is 16-byte aligned.
10646 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10648 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10649
10650 // Store the input value into Value#0 of the stack slot.
10651 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10653 // Load it out.
10654 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10655}
10656
10657SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10658 SelectionDAG &DAG) const {
10659 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10660 "Should only be called for ISD::INSERT_VECTOR_ELT");
10661
10662 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10663 // We have legal lowering for constant indices but not for variable ones.
10664 if (!C)
10665 return SDValue();
10666
10667 EVT VT = Op.getValueType();
10668 SDLoc dl(Op);
10669 SDValue V1 = Op.getOperand(0);
10670 SDValue V2 = Op.getOperand(1);
10671 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10672 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10673 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10675 unsigned InsertAtElement = C->getZExtValue();
10677 if (Subtarget.isLittleEndian()) {
10679 }
10680 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10682 }
10683 return Op;
10684}
10685
10686SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
10687 SelectionDAG &DAG) const {
10688 SDLoc dl(Op);
10689 SDNode *N = Op.getNode();
10690
10691 assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
10692 "Unknown extract_vector_elt type");
10693
10694 SDValue Value = N->getOperand(0);
10695
10696 // The first part of this is like the store lowering except that we don't
10697 // need to track the chain.
10698
10699 // The values are now known to be -1 (false) or 1 (true). To convert this
10700 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
10701 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
10703
10704 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
10705 // understand how to form the extending load.
10706 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
10707
10709
10710 // Now convert to an integer and store.
10712 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
10713 Value);
10714
10716 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10717 MachinePointerInfo PtrInfo =
10720 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10721
10723 SDValue Ops[] = {StoreChain,
10724 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
10725 Value, FIdx};
10726 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
10727
10729 dl, VTs, Ops, MVT::v4i32, PtrInfo);
10730
10731 // Extract the value requested.
10732 unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
10733 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
10734 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
10735
10736 SDValue IntVal =
10737 DAG.getLoad(MVT::i32, dl, StoreChain, Idx, PtrInfo.getWithOffset(Offset));
10738
10739 if (!Subtarget.useCRBits())
10740 return IntVal;
10741
10742 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
10743}
10744
10745/// Lowering for QPX v4i1 loads
10746SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10747 SelectionDAG &DAG) const {
10748 SDLoc dl(Op);
10749 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10750 SDValue LoadChain = LN->getChain();
10751 SDValue BasePtr = LN->getBasePtr();
10752
10753 if (Op.getValueType() == MVT::v4f64 ||
10754 Op.getValueType() == MVT::v4f32) {
10755 EVT MemVT = LN->getMemoryVT();
10756 unsigned Alignment = LN->getAlignment();
10757
10758 // If this load is properly aligned, then it is legal.
10759 if (Alignment >= MemVT.getStoreSize())
10760 return Op;
10761
10762 EVT ScalarVT = Op.getValueType().getScalarType(),
10763 ScalarMemVT = MemVT.getScalarType();
10764 unsigned Stride = ScalarMemVT.getStoreSize();
10765
10766 SDValue Vals[4], LoadChains[4];
10767 for (unsigned Idx = 0; Idx < 4; ++Idx) {
10768 SDValue Load;
10769 if (ScalarVT != ScalarMemVT)
10770 Load = DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
10771 BasePtr,
10772 LN->getPointerInfo().getWithOffset(Idx * Stride),
10773 ScalarMemVT, MinAlign(Alignment, Idx * Stride),
10774 LN->getMemOperand()->getFlags(), LN->getAAInfo());
10775 else
10776 Load = DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
10777 LN->getPointerInfo().getWithOffset(Idx * Stride),
10778 MinAlign(Alignment, Idx * Stride),
10779 LN->getMemOperand()->getFlags(), LN->getAAInfo());
10780
10781 if (Idx == 0 && LN->isIndexed()) {
10782 assert(LN->getAddressingMode() == ISD::PRE_INC &&
10783 "Unknown addressing mode on vector load");
10784 Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
10785 LN->getAddressingMode());
10786 }
10787
10788 Vals[Idx] = Load;
10789 LoadChains[Idx] = Load.getValue(1);
10790
10791 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10792 DAG.getConstant(Stride, dl,
10793 BasePtr.getValueType()));
10794 }
10795
10797 SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
10798
10799 if (LN->isIndexed()) {
10800 SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
10801 return DAG.getMergeValues(RetOps, dl);
10802 }
10803
10804 SDValue RetOps[] = { Value, TF };
10805 return DAG.getMergeValues(RetOps, dl);
10806 }
10807
10808 assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
10809 assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
10810
10811 // To lower v4i1 from a byte array, we load the byte elements of the
10812 // vector and then reuse the BUILD_VECTOR logic.
10813
10815 for (unsigned i = 0; i < 4; ++i) {
10816 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
10817 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
10818
10819 VectElmts[i] = DAG.getExtLoad(
10820 ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
10821 LN->getPointerInfo().getWithOffset(i), MVT::i8,
10822 /* Alignment = */ 1, LN->getMemOperand()->getFlags(), LN->getAAInfo());
10823 VectElmtChains[i] = VectElmts[i].getValue(1);
10824 }
10825
10826 LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
10828
10829 SDValue RVals[] = { Value, LoadChain };
10830 return DAG.getMergeValues(RVals, dl);
10831}
10832
10833/// Lowering for QPX v4i1 stores
10834SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10835 SelectionDAG &DAG) const {
10836 SDLoc dl(Op);
10837 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10838 SDValue StoreChain = SN->getChain();
10839 SDValue BasePtr = SN->getBasePtr();
10840 SDValue Value = SN->getValue();
10841
10842 if (Value.getValueType() == MVT::v4f64 ||
10843 Value.getValueType() == MVT::v4f32) {
10844 EVT MemVT = SN->getMemoryVT();
10845 unsigned Alignment = SN->getAlignment();
10846
10847 // If this store is properly aligned, then it is legal.
10848 if (Alignment >= MemVT.getStoreSize())
10849 return Op;
10850
10851 EVT ScalarVT = Value.getValueType().getScalarType(),
10852 ScalarMemVT = MemVT.getScalarType();
10853 unsigned Stride = ScalarMemVT.getStoreSize();
10854
10855 SDValue Stores[4];
10856 for (unsigned Idx = 0; Idx < 4; ++Idx) {
10858 DAG.getVectorIdxConstant(Idx, dl));
10859 SDValue Store;
10860 if (ScalarVT != ScalarMemVT)
10861 Store =
10862 DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
10863 SN->getPointerInfo().getWithOffset(Idx * Stride),
10864 ScalarMemVT, MinAlign(Alignment, Idx * Stride),
10865 SN->getMemOperand()->getFlags(), SN->getAAInfo());
10866 else
10867 Store = DAG.getStore(StoreChain, dl, Ex, BasePtr,
10868 SN->getPointerInfo().getWithOffset(Idx * Stride),
10869 MinAlign(Alignment, Idx * Stride),
10870 SN->getMemOperand()->getFlags(), SN->getAAInfo());
10871
10872 if (Idx == 0 && SN->isIndexed()) {
10873 assert(SN->getAddressingMode() == ISD::PRE_INC &&
10874 "Unknown addressing mode on vector store");
10875 Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
10876 SN->getAddressingMode());
10877 }
10878
10879 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10880 DAG.getConstant(Stride, dl,
10881 BasePtr.getValueType()));
10882 Stores[Idx] = Store;
10883 }
10884
10885 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10886
10887 if (SN->isIndexed()) {
10888 SDValue RetOps[] = { TF, Stores[0].getValue(1) };
10889 return DAG.getMergeValues(RetOps, dl);
10890 }
10891
10892 return TF;
10893 }
10894
10895 assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
10896 assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
10897
10898 // The values are now known to be -1 (false) or 1 (true). To convert this
10899 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
10900 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
10902
10903 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
10904 // understand how to form the extending load.
10905 SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
10906
10908
10909 // Now convert to an integer and store.
10911 DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
10912 Value);
10913
10915 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10916 MachinePointerInfo PtrInfo =
10919 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10920
10921 SDValue Ops[] = {StoreChain,
10922 DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
10923 Value, FIdx};
10924 SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
10925
10927 dl, VTs, Ops, MVT::v4i32, PtrInfo);
10928
10929 // Move data into the byte array.
10930 SDValue Loads[4], LoadChains[4];
10931 for (unsigned i = 0; i < 4; ++i) {
10932 unsigned Offset = 4*i;
10933 SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
10934 Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
10935
10936 Loads[i] = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
10937 PtrInfo.getWithOffset(Offset));
10938 LoadChains[i] = Loads[i].getValue(1);
10939 }
10940
10942
10943 SDValue Stores[4];
10944 for (unsigned i = 0; i < 4; ++i) {
10945 SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
10946 Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
10947
10948 Stores[i] = DAG.getTruncStore(
10949 StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
10950 MVT::i8, /* Alignment = */ 1, SN->getMemOperand()->getFlags(),
10951 SN->getAAInfo());
10952 }
10953
10954 StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10955
10956 return StoreChain;
10957}
10958
10959SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10960 SDLoc dl(Op);
10961 if (Op.getValueType() == MVT::v4i32) {
10962 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10963
10964 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10965 // +16 as shift amt.
10966 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10967 SDValue RHSSwap = // = vrlw RHS, 16
10968 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10969
10970 // Shrinkify inputs to v8i16.
10971 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10972 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10974
10975 // Low parts multiplied together, generating 32-bit results (we ignore the
10976 // top parts).
10977 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10978 LHS, RHS, DAG, dl, MVT::v4i32);
10979
10980 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10981 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10982 // Shift the high parts up 16 bits.
10983 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10984 Neg16, DAG, dl);
10985 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10986 } else if (Op.getValueType() == MVT::v16i8) {
10987 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10988 bool isLittleEndian = Subtarget.isLittleEndian();
10989
10990 // Multiply the even 8-bit parts, producing 16-bit sums.
10991 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10992 LHS, RHS, DAG, dl, MVT::v8i16);
10994
10995 // Multiply the odd 8-bit parts, producing 16-bit sums.
10996 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10997 LHS, RHS, DAG, dl, MVT::v8i16);
10999
11000 // Merge the results together. Because vmuleub and vmuloub are
11001 // instructions with a big-endian bias, we must reverse the
11002 // element numbering and reverse the meaning of "odd" and "even"
11003 // when generating little endian code.
11004 int Ops[16];
11005 for (unsigned i = 0; i != 8; ++i) {
11006 if (isLittleEndian) {
11007 Ops[i*2 ] = 2*i;
11008 Ops[i*2+1] = 2*i+16;
11009 } else {
11010 Ops[i*2 ] = 2*i+1;
11011 Ops[i*2+1] = 2*i+1+16;
11012 }
11013 }
11014 if (isLittleEndian)
11015 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11016 else
11017 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11018 } else {
11019 llvm_unreachable("Unknown mul to lower!");
11020 }
11021}
11022
11023SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
11024
11025 assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
11026
11027 EVT VT = Op.getValueType();
11028 assert(VT.isVector() &&
11029 "Only set vector abs as custom, scalar abs shouldn't reach here!");
11030 assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
11031 VT == MVT::v16i8) &&
11032 "Unexpected vector element type!");
11033 assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
11034 "Current subtarget doesn't support smax v2i64!");
11035
11036 // For vector abs, it can be lowered to:
11037 // abs x
11038 // ==>
11039 // y = -x
11040 // smax(x, y)
11041
11042 SDLoc dl(Op);
11043 SDValue X = Op.getOperand(0);
11044 SDValue Zero = DAG.getConstant(0, dl, VT);
11045 SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
11046
11047 // SMAX patch https://reviews.llvm.org/D47332
11048 // hasn't landed yet, so use intrinsic first here.
11049 // TODO: Should use SMAX directly once SMAX patch landed
11050 Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
11051 if (VT == MVT::v2i64)
11052 BifID = Intrinsic::ppc_altivec_vmaxsd;
11053 else if (VT == MVT::v8i16)
11054 BifID = Intrinsic::ppc_altivec_vmaxsh;
11055 else if (VT == MVT::v16i8)
11056 BifID = Intrinsic::ppc_altivec_vmaxsb;
11057
11058 return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
11059}
11060
11061// Custom lowering for fpext vf32 to v2f64
11062SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11063
11064 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11065 "Should only be called for ISD::FP_EXTEND");
11066
11067 // FIXME: handle extends from half precision float vectors on P9.
11068 // We only want to custom lower an extend from v2f32 to v2f64.
11069 if (Op.getValueType() != MVT::v2f64 ||
11070 Op.getOperand(0).getValueType() != MVT::v2f32)
11071 return SDValue();
11072
11073 SDLoc dl(Op);
11074 SDValue Op0 = Op.getOperand(0);
11075
11076 switch (Op0.getOpcode()) {
11077 default:
11078 return SDValue();
11080 assert(Op0.getNumOperands() == 2 &&
11082 "Node should have 2 operands with second one being a constant!");
11083
11084 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11085 return SDValue();
11086
11087 // Custom lower is only done for high or low doubleword.
11088 int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
11089 if (Idx % 2 != 0)
11090 return SDValue();
11091
11092 // Since input is v4f32, at this point Idx is either 0 or 2.
11093 // Shift to get the doubleword position we want.
11094 int DWord = Idx >> 1;
11095
11096 // High and low word positions are different on little endian.
11097 if (Subtarget.isLittleEndian())
11098 DWord ^= 0x1;
11099
11101 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11102 }
11103 case ISD::FADD:
11104 case ISD::FMUL:
11105 case ISD::FSUB: {
11106 SDValue NewLoad[2];
11107 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11108 // Ensure both input are loads.
11109 SDValue LdOp = Op0.getOperand(i);
11110 if (LdOp.getOpcode() != ISD::LOAD)
11111 return SDValue();
11112 // Generate new load node.
11114 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11117 LD->getMemoryVT(), LD->getMemOperand());
11118 }
11119 SDValue NewOp =
11120 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11121 NewLoad[1], Op0.getNode()->getFlags());
11123 DAG.getConstant(0, dl, MVT::i32));
11124 }
11125 case ISD::LOAD: {
11127 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11128 SDValue NewLd = DAG.getMemIntrinsicNode(
11130 LD->getMemoryVT(), LD->getMemOperand());
11131 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11132 DAG.getConstant(0, dl, MVT::i32));
11133 }
11134 }
11135 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11136}
11137
11138/// LowerOperation - Provide custom lowering hooks for some operations.
11139///
11141 switch (Op.getOpcode()) {
11142 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11143 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11144 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11145 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11146 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11147 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11148 case ISD::SETCC: return LowerSETCC(Op, DAG);
11149 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11150 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11151
11152 // Variable argument lowering.
11153 case ISD::VASTART: return LowerVASTART(Op, DAG);
11154 case ISD::VAARG: return LowerVAARG(Op, DAG);
11155 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11156
11157 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11158 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11160 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11161
11162 // Exception handling lowering.
11163 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11164 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11165 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11166
11167 case ISD::LOAD: return LowerLOAD(Op, DAG);
11168 case ISD::STORE: return LowerSTORE(Op, DAG);
11169 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11170 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11171 case ISD::FP_TO_UINT:
11172 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11173 case ISD::UINT_TO_FP:
11174 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11175 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
11176
11177 // Lower 64-bit shifts.
11178 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11179 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11180 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11181
11182 // Vector-related lowering.
11183 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11184 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11185 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11186 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11187 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
11188 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11189 case ISD::MUL: return LowerMUL(Op, DAG);
11190 case ISD::ABS: return LowerABS(Op, DAG);
11191 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11192 case ISD::ROTL: return LowerROTL(Op, DAG);
11193
11194 // For counter-based loop handling.
11195 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11196
11197 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11198
11199 // Frame & Return address.
11200 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11201 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11202
11204 return LowerINTRINSIC_VOID(Op, DAG);
11205 case ISD::BSWAP:
11206 return LowerBSWAP(Op, DAG);
11208 return LowerATOMIC_CMP_SWAP(Op, DAG);
11209 }
11210}
11211
11214 SelectionDAG &DAG) const {
11215 SDLoc dl(N);
11216 switch (N->getOpcode()) {
11217 default:
11218 llvm_unreachable("Do not know how to custom type legalize this operation!");
11219 case ISD::READCYCLECOUNTER: {
11221 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11222
11223 Results.push_back(
11224 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11225 Results.push_back(RTB.getValue(2));
11226 break;
11227 }
11229 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11230 Intrinsic::loop_decrement)
11231 break;
11232
11233 assert(N->getValueType(0) == MVT::i1 &&
11234 "Unexpected result type for CTR decrement intrinsic");
11236 N->getValueType(0));
11237 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11238 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11239 N->getOperand(1));
11240
11241 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11242 Results.push_back(NewInt.getValue(1));
11243 break;
11244 }
11245 case ISD::VAARG: {
11246 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11247 return;
11248
11249 EVT VT = N->getValueType(0);
11250
11251 if (VT == MVT::i64) {
11252 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11253
11254 Results.push_back(NewNode);
11255 Results.push_back(NewNode.getValue(1));
11256 }
11257 return;
11258 }
11259 case ISD::FP_TO_SINT:
11260 case ISD::FP_TO_UINT:
11261 // LowerFP_TO_INT() can only handle f32 and f64.
11262 if (N->getOperand(0).getValueType() == MVT::ppcf128)
11263 return;
11264 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
11265 return;
11266 case ISD::TRUNCATE: {
11267 EVT TrgVT = N->getValueType(0);
11268 EVT OpVT = N->getOperand(0).getValueType();
11269 if (TrgVT.isVector() &&
11270 isOperationCustom(N->getOpcode(), TrgVT) &&
11271 OpVT.getSizeInBits() <= 128 &&
11272 isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
11273 Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
11274 return;
11275 }
11276 case ISD::BITCAST:
11277 // Don't handle bitcast here.
11278 return;
11279 case ISD::FP_EXTEND:
11280 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11281 if (Lowered)
11282 Results.push_back(Lowered);
11283 return;
11284 }
11285}
11286
11287//===----------------------------------------------------------------------===//
11288// Other Lowering Code
11289//===----------------------------------------------------------------------===//
11290
11292 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11293 Function *Func = Intrinsic::getDeclaration(M, Id);
11294 return Builder.CreateCall(Func, {});
11295}
11296
11297// The mappings for emitLeading/TrailingFence is taken from
11298// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11300 Instruction *Inst,
11301 AtomicOrdering Ord) const {
11303 return callIntrinsic(Builder, Intrinsic::ppc_sync);
11304 if (isReleaseOrStronger(Ord))
11305 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11306 return nullptr;
11307}
11308
11310 Instruction *Inst,
11311 AtomicOrdering Ord) const {
11312 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11313 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11314 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11315 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11316 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11317 return Builder.CreateCall(
11319 Builder.GetInsertBlock()->getParent()->getParent(),
11320 Intrinsic::ppc_cfence, {Inst->getType()}),
11321 {Inst});
11322 // FIXME: Can use isync for rmw operation.
11323 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11324 }
11325 return nullptr;
11326}
11327
11330 unsigned AtomicSize,
11331 unsigned BinOpcode,
11332 unsigned CmpOpcode,
11333 unsigned CmpPred) const {
11334 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11335 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11336
11337 auto LoadMnemonic = PPC::LDARX;
11338 auto StoreMnemonic = PPC::STDCX;
11339 switch (AtomicSize) {
11340 default:
11341 llvm_unreachable("Unexpected size of atomic entity");
11342 case 1:
11343 LoadMnemonic = PPC::LBARX;
11344 StoreMnemonic = PPC::STBCX;
11345 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11346 break;
11347 case 2:
11348 LoadMnemonic = PPC::LHARX;
11349 StoreMnemonic = PPC::STHCX;
11350 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11351 break;
11352 case 4:
11353 LoadMnemonic = PPC::LWARX;
11354 StoreMnemonic = PPC::STWCX;
11355 break;
11356 case 8:
11357 LoadMnemonic = PPC::LDARX;
11358 StoreMnemonic = PPC::STDCX;
11359 break;
11360 }
11361
11362 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11363 MachineFunction *F = BB->getParent();
11365
11366 Register dest = MI.getOperand(0).getReg();
11367 Register ptrA = MI.getOperand(1).getReg();
11368 Register ptrB = MI.getOperand(2).getReg();
11369 Register incr = MI.getOperand(3).getReg();
11370 DebugLoc dl = MI.getDebugLoc();
11371
11372 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11374 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11375 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11376 F->insert(It, loopMBB);
11377 if (CmpOpcode)
11378 F->insert(It, loop2MBB);
11379 F->insert(It, exitMBB);
11380 exitMBB->splice(exitMBB->begin(), BB,
11381 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11382 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11383
11384 MachineRegisterInfo &RegInfo = F->getRegInfo();
11385 Register TmpReg = (!BinOpcode) ? incr :
11386 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11387 : &PPC::GPRCRegClass);
11388
11389 // thisMBB:
11390 // ...
11391 // fallthrough --> loopMBB
11392 BB->addSuccessor(loopMBB);
11393
11394 // loopMBB:
11395 // l[wd]arx dest, ptr
11396 // add r0, dest, incr
11397 // st[wd]cx. r0, ptr
11398 // bne- loopMBB
11399 // fallthrough --> exitMBB
11400
11401 // For max/min...
11402 // loopMBB:
11403 // l[wd]arx dest, ptr
11404 // cmpl?[wd] incr, dest
11405 // bgt exitMBB
11406 // loop2MBB:
11407 // st[wd]cx. dest, ptr
11408 // bne- loopMBB
11409 // fallthrough --> exitMBB
11410
11411 BB = loopMBB;
11412 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11414 if (BinOpcode)
11415 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11416 if (CmpOpcode) {
11417 // Signed comparisons of byte or halfword values must be sign-extended.
11418 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11419 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11420 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11421 ExtReg).addReg(dest);
11422 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11424 } else
11425 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11427
11428 BuildMI(BB, dl, TII->get(PPC::BCC))
11429 .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11431 BB->addSuccessor(exitMBB);
11432 BB = loop2MBB;
11433 }
11434 BuildMI(BB, dl, TII->get(StoreMnemonic))
11435 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11436 BuildMI(BB, dl, TII->get(PPC::BCC))
11438 BB->addSuccessor(loopMBB);
11439 BB->addSuccessor(exitMBB);
11440
11441 // exitMBB:
11442 // ...
11443 BB = exitMBB;
11444 return BB;
11445}
11446
11449 bool is8bit, // operation
11450 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11451 // If we support part-word atomic mnemonics, just use them
11452 if (Subtarget.hasPartwordAtomics())
11453 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11454 CmpPred);
11455
11456 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11457 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11458 // In 64 bit mode we have to use 64 bits for addresses, even though the
11459 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11460 // registers without caring whether they're 32 or 64, but here we're
11461 // doing actual arithmetic on the addresses.
11462 bool is64bit = Subtarget.isPPC64();
11463 bool isLittleEndian = Subtarget.isLittleEndian();
11464 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11465
11466 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11467 MachineFunction *F = BB->getParent();
11469
11470 Register dest = MI.getOperand(0).getReg();
11471 Register ptrA = MI.getOperand(1).getReg();
11472 Register ptrB = MI.getOperand(2).getReg();
11473 Register incr = MI.getOperand(3).getReg();
11474 DebugLoc dl = MI.getDebugLoc();
11475
11476 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11478 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11479 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11480 F->insert(It, loopMBB);
11481 if (CmpOpcode)
11482 F->insert(It, loop2MBB);
11483 F->insert(It, exitMBB);
11484 exitMBB->splice(exitMBB->begin(), BB,
11485 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11486 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11487
11488 MachineRegisterInfo &RegInfo = F->getRegInfo();
11489 const TargetRegisterClass *RC =
11490 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11491 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11492
11493 Register PtrReg = RegInfo.createVirtualRegister(RC);
11494 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11495 Register ShiftReg =
11496 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11497 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11498 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11499 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11500 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11501 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11502 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11503 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11504 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11506 Register TmpReg =
11507 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11508
11509 // thisMBB:
11510 // ...
11511 // fallthrough --> loopMBB
11512 BB->addSuccessor(loopMBB);
11513
11514 // The 4-byte load must be aligned, while a char or short may be
11515 // anywhere in the word. Hence all this nasty bookkeeping code.
11516 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11517 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11518 // xori shift, shift1, 24 [16]
11519 // rlwinm ptr, ptr1, 0, 0, 29
11520 // slw incr2, incr, shift
11521 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11522 // slw mask, mask2, shift
11523 // loopMBB:
11524 // lwarx tmpDest, ptr
11525 // add tmp, tmpDest, incr2
11526 // andc tmp2, tmpDest, mask
11527 // and tmp3, tmp, mask
11528 // or tmp4, tmp3, tmp2
11529 // stwcx. tmp4, ptr
11530 // bne- loopMBB
11531 // fallthrough --> exitMBB
11532 // srw dest, tmpDest, shift
11533 if (ptrA != ZeroReg) {
11534 Ptr1Reg = RegInfo.createVirtualRegister(RC);
11535 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11536 .addReg(ptrA)
11537 .addReg(ptrB);
11538 } else {
11539 Ptr1Reg = ptrB;
11540 }
11541 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11542 // mode.
11543 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11544 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11545 .addImm(3)
11546 .addImm(27)
11547 .addImm(is8bit ? 28 : 27);
11548 if (!isLittleEndian)
11549 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11551 .addImm(is8bit ? 24 : 16);
11552 if (is64bit)
11553 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11554 .addReg(Ptr1Reg)
11555 .addImm(0)
11556 .addImm(61);
11557 else
11558 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11559 .addReg(Ptr1Reg)
11560 .addImm(0)
11561 .addImm(0)
11562 .addImm(29);
11563 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11564 if (is8bit)
11565 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11566 else {
11567 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11568 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11570 .addImm(65535);
11571 }
11572 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11574 .addReg(ShiftReg);
11575
11576 BB = loopMBB;
11577 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11578 .addReg(ZeroReg)
11579 .addReg(PtrReg);
11580 if (BinOpcode)
11581 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11584 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11586 .addReg(MaskReg);
11587 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11588 if (CmpOpcode) {
11589 // For unsigned comparisons, we can directly compare the shifted values.
11590 // For signed comparisons we shift and sign extend.
11591 Register SReg = RegInfo.createVirtualRegister(GPRC);
11592 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11594 .addReg(MaskReg);
11595 unsigned ValueReg = SReg;
11596 unsigned CmpReg = Incr2Reg;
11597 if (CmpOpcode == PPC::CMPW) {
11598 ValueReg = RegInfo.createVirtualRegister(GPRC);
11599 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11600 .addReg(SReg)
11601 .addReg(ShiftReg);
11602 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11603 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11604 .addReg(ValueReg);
11606 CmpReg = incr;
11607 }
11608 BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11609 .addReg(CmpReg)
11610 .addReg(ValueReg);
11611 BuildMI(BB, dl, TII->get(PPC::BCC))
11612 .addImm(CmpPred)
11613 .addReg(PPC::CR0)
11614 .addMBB(exitMBB);
11616 BB->addSuccessor(exitMBB);
11617 BB = loop2MBB;
11618 }
11619 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11620 BuildMI(BB, dl, TII->get(PPC::STWCX))
11621 .addReg(Tmp4Reg)
11622 .addReg(ZeroReg)
11623 .addReg(PtrReg);
11624 BuildMI(BB, dl, TII->get(PPC::BCC))
11626 .addReg(PPC::CR0)
11627 .addMBB(loopMBB);
11628 BB->addSuccessor(loopMBB);
11629 BB->addSuccessor(exitMBB);
11630
11631 // exitMBB:
11632 // ...
11633 BB = exitMBB;
11634 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11636 .addReg(ShiftReg);
11637 return BB;
11638}
11639
11642 MachineBasicBlock *MBB) const {
11643 DebugLoc DL = MI.getDebugLoc();
11644 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11645 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11646
11647 MachineFunction *MF = MBB->getParent();
11649
11650 const BasicBlock *BB = MBB->getBasicBlock();
11652
11653 Register DstReg = MI.getOperand(0).getReg();
11654 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11655 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11656 Register mainDstReg = MRI.createVirtualRegister(RC);
11657 Register restoreDstReg = MRI.createVirtualRegister(RC);
11658
11660 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11661 "Invalid Pointer Size!");
11662 // For v = setjmp(buf), we generate
11663 //
11664 // thisMBB:
11665 // SjLjSetup mainMBB
11666 // bl mainMBB
11667 // v_restore = 1
11668 // b sinkMBB
11669 //
11670 // mainMBB:
11671 // buf[LabelOffset] = LR
11672 // v_main = 0
11673 //
11674 // sinkMBB:
11675 // v = phi(main, restore)
11676 //
11677
11681 MF->insert(I, mainMBB);
11682 MF->insert(I, sinkMBB);
11683
11685
11686 // Transfer the remainder of BB and its successor edges to sinkMBB.
11687 sinkMBB->splice(sinkMBB->begin(), MBB,
11688 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11689 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11690
11691 // Note that the structure of the jmp_buf used here is not compatible
11692 // with that used by libc, and is not designed to be. Specifically, it
11693 // stores only those 'reserved' registers that LLVM does not otherwise
11694 // understand how to spill. Also, by convention, by the time this
11695 // intrinsic is called, Clang has already stored the frame address in the
11696 // first slot of the buffer and stack address in the third. Following the
11697 // X86 target code, we'll store the jump address in the second slot. We also
11698 // need to save the TOC pointer (R2) to handle jumps between shared
11699 // libraries, and that will be stored in the fourth slot. The thread
11700 // identifier (R13) is not affected.
11701
11702 // thisMBB:
11703 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11704 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11705 const int64_t BPOffset = 4 * PVT.getStoreSize();
11706
11707 // Prepare IP either in reg.
11709 Register LabelReg = MRI.createVirtualRegister(PtrRC);
11710 Register BufReg = MI.getOperand(1).getReg();
11711
11712 if (Subtarget.is64BitELFABI()) {
11714 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11715 .addReg(PPC::X2)
11717 .addReg(BufReg)
11718 .cloneMemRefs(MI);
11719 }
11720
11721 // Naked functions never have a base pointer, and so we use r1. For all
11722 // other functions, this decision must be delayed until during PEI.
11723 unsigned BaseReg;
11724 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11725 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11726 else
11727 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11728
11729 MIB = BuildMI(*thisMBB, MI, DL,
11730 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11731 .addReg(BaseReg)
11733 .addReg(BufReg)
11734 .cloneMemRefs(MI);
11735
11736 // Setup
11737 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11738 MIB.addRegMask(TRI->getNoPreservedMask());
11739
11740 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11741
11742 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11743 .addMBB(mainMBB);
11744 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11745
11746 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11747 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11748
11749 // mainMBB:
11750 // mainDstReg = 0
11751 MIB =
11753 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11754
11755 // Store IP
11756 if (Subtarget.isPPC64()) {
11757 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11760 .addReg(BufReg);
11761 } else {
11762 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11765 .addReg(BufReg);
11766 }
11767 MIB.cloneMemRefs(MI);
11768
11769 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11770 mainMBB->addSuccessor(sinkMBB);
11771
11772 // sinkMBB:
11773 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11774 TII->get(PPC::PHI), DstReg)
11777
11778 MI.eraseFromParent();
11779 return sinkMBB;
11780}
11781
11784 MachineBasicBlock *MBB) const {
11785 DebugLoc DL = MI.getDebugLoc();
11786 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11787
11788 MachineFunction *MF = MBB->getParent();
11790
11792 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11793 "Invalid Pointer Size!");
11794
11795 const TargetRegisterClass *RC =
11796 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11797 Register Tmp = MRI.createVirtualRegister(RC);
11798 // Since FP is only updated here but NOT referenced, it's treated as GPR.
11799 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11800 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11801 unsigned BP =
11802 (PVT == MVT::i64)
11803 ? PPC::X30
11804 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11805 : PPC::R30);
11806
11808
11809 const int64_t LabelOffset = 1 * PVT.getStoreSize();
11810 const int64_t SPOffset = 2 * PVT.getStoreSize();
11811 const int64_t TOCOffset = 3 * PVT.getStoreSize();
11812 const int64_t BPOffset = 4 * PVT.getStoreSize();
11813
11814 Register BufReg = MI.getOperand(0).getReg();
11815
11816 // Reload FP (the jumped-to function may not have had a
11817 // frame pointer, and if so, then its r31 will be restored
11818 // as necessary).
11819 if (PVT == MVT::i64) {
11820 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11821 .addImm(0)
11822 .addReg(BufReg);
11823 } else {
11824 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11825 .addImm(0)
11826 .addReg(BufReg);
11827 }
11828 MIB.cloneMemRefs(MI);
11829
11830 // Reload IP
11831 if (PVT == MVT::i64) {
11832 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11834 .addReg(BufReg);
11835 } else {
11836 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11838 .addReg(BufReg);
11839 }
11840 MIB.cloneMemRefs(MI);
11841
11842 // Reload SP
11843 if (PVT == MVT::i64) {
11844 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11845 .addImm(SPOffset)
11846 .addReg(BufReg);
11847 } else {
11848 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11849 .addImm(SPOffset)
11850 .addReg(BufReg);
11851 }
11852 MIB.cloneMemRefs(MI);
11853
11854 // Reload BP
11855 if (PVT == MVT::i64) {
11856 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11858 .addReg(BufReg);
11859 } else {
11860 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11862 .addReg(BufReg);
11863 }
11864 MIB.cloneMemRefs(MI);
11865
11866 // Reload TOC
11867 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11869 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11871 .addReg(BufReg)
11872 .cloneMemRefs(MI);
11873 }
11874
11875 // Jump
11876 BuildMI(*MBB, MI, DL,
11877 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11878 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11879
11880 MI.eraseFromParent();
11881 return MBB;
11882}
11883
11885 // If the function specifically requests inline stack probes, emit them.
11886 if (MF.getFunction().hasFnAttribute("probe-stack"))
11887 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11888 "inline-asm";
11889 return false;
11890}
11891
11893 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11894 unsigned StackAlign = TFI->getStackAlignment();
11895 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
11896 "Unexpected stack alignment");
11897 // The default stack probe size is 4096 if the function has no
11898 // stack-probe-size attribute.
11899 unsigned StackProbeSize = 4096;
11900 const Function &Fn = MF.getFunction();
11901 if (Fn.hasFnAttribute("stack-probe-size"))
11902 Fn.getFnAttribute("stack-probe-size")
11904 .getAsInteger(0, StackProbeSize);
11905 // Round down to the stack alignment.
11906 StackProbeSize &= ~(StackAlign - 1);
11907 return StackProbeSize ? StackProbeSize : StackAlign;
11908}
11909
11910// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11911// into three phases. In the first phase, it uses pseudo instruction
11912// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11913// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11914// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11915// MaxCallFrameSize so that it can calculate correct data area pointer.
11918 MachineBasicBlock *MBB) const {
11919 const bool isPPC64 = Subtarget.isPPC64();
11920 MachineFunction *MF = MBB->getParent();
11921 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11922 DebugLoc DL = MI.getDebugLoc();
11923 const unsigned ProbeSize = getStackProbeSize(*MF);
11926 // The CFG of probing stack looks as
11927 // +-----+
11928 // | MBB |
11929 // +--+--+
11930 // |
11931 // +----v----+
11932 // +--->+ TestMBB +---+
11933 // | +----+----+ |
11934 // | | |
11935 // | +-----v----+ |
11936 // +---+ BlockMBB | |
11937 // +----------+ |
11938 // |
11939 // +---------+ |
11940 // | TailMBB +<--+
11941 // +---------+
11942 // In MBB, calculate previous frame pointer and final stack pointer.
11943 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11944 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11945 // TailMBB is spliced via \p MI.
11949
11951 MF->insert(MBBIter, TestMBB);
11952 MF->insert(MBBIter, BlockMBB);
11953 MF->insert(MBBIter, TailMBB);
11954
11955 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11956 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11957
11958 Register DstReg = MI.getOperand(0).getReg();
11959 Register NegSizeReg = MI.getOperand(1).getReg();
11960 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11961 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11962 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11963 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11964
11965 // Since value of NegSizeReg might be realigned in prologepilog, insert a
11966 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11967 // NegSize.
11968 unsigned ProbeOpc;
11969 if (!MRI.hasOneNonDBGUse(NegSizeReg))
11970 ProbeOpc =
11971 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11972 else
11973 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11974 // and NegSizeReg will be allocated in the same phyreg to avoid
11975 // redundant copy when NegSizeReg has only one use which is current MI and
11976 // will be replaced by PREPARE_PROBED_ALLOCA then.
11977 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11978 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11979 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11982 .add(MI.getOperand(2))
11983 .add(MI.getOperand(3));
11984
11985 // Calculate final stack pointer, which equals to SP + ActualNegSize.
11986 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11988 .addReg(SPReg)
11990
11991 // Materialize a scratch register for update.
11992 int64_t NegProbeSize = -(int64_t)ProbeSize;
11993 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11994 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11995 if (!isInt<16>(NegProbeSize)) {
11996 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11997 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11998 .addImm(NegProbeSize >> 16);
11999 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12000 ScratchReg)
12001 .addReg(TempReg)
12002 .addImm(NegProbeSize & 0xFFFF);
12003 } else
12004 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12006
12007 {
12008 // Probing leading residual part.
12009 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12010 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12013 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12014 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12015 .addReg(Div)
12017 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12018 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12019 .addReg(Mul)
12021 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12022 .addReg(FramePointer)
12023 .addReg(SPReg)
12024 .addReg(NegMod);
12025 }
12026
12027 {
12028 // Remaining part should be multiple of ProbeSize.
12029 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12030 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12031 .addReg(SPReg)
12033 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12036 .addMBB(TailMBB);
12037 TestMBB->addSuccessor(BlockMBB);
12038 TestMBB->addSuccessor(TailMBB);
12039 }
12040
12041 {
12042 // Touch the block.
12043 // |P...|P...|P...
12044 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12045 .addReg(FramePointer)
12046 .addReg(SPReg)
12048 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12049 BlockMBB->addSuccessor(TestMBB);
12050 }
12051
12052 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12053 // DYNAREAOFFSET pseudo instruction to get the future result.
12055 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12057 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12059 .add(MI.getOperand(2))
12060 .add(MI.getOperand(3));
12061 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12062 .addReg(SPReg)
12064
12065 // Splice instructions after MI to TailMBB.
12066 TailMBB->splice(TailMBB->end(), MBB,
12067 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12068 TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
12070
12071 // Delete the pseudo instruction.
12072 MI.eraseFromParent();
12073
12075 return TailMBB;
12076}
12077
12080 MachineBasicBlock *BB) const {
12081 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12082 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12083 if (Subtarget.is64BitELFABI() &&
12084 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12085 !Subtarget.isUsingPCRelativeCalls()) {
12086 // Call lowering should have added an r2 operand to indicate a dependence
12087 // on the TOC base pointer value. It can't however, because there is no
12088 // way to mark the dependence as implicit there, and so the stackmap code
12089 // will confuse it with a regular operand. Instead, add the dependence
12090 // here.
12091 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12092 }
12093
12094 return emitPatchPoint(MI, BB);
12095 }
12096
12097 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12098 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12099 return emitEHSjLjSetJmp(MI, BB);
12100 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12101 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12102 return emitEHSjLjLongJmp(MI, BB);
12103 }
12104
12105 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12106
12107 // To "insert" these instructions we actually have to insert their
12108 // control-flow patterns.
12109 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12111
12112 MachineFunction *F = BB->getParent();
12113
12114 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12115 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
12116 MI.getOpcode() == PPC::SELECT_I8) {
12118 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12119 MI.getOpcode() == PPC::SELECT_CC_I8)
12120 Cond.push_back(MI.getOperand(4));
12121 else
12123 Cond.push_back(MI.getOperand(1));
12124
12125 DebugLoc dl = MI.getDebugLoc();
12126 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12127 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12128 } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
12129 MI.getOpcode() == PPC::SELECT_CC_F8 ||
12130 MI.getOpcode() == PPC::SELECT_CC_F16 ||
12131 MI.getOpcode() == PPC::SELECT_CC_QFRC ||
12132 MI.getOpcode() == PPC::SELECT_CC_QSRC ||
12133 MI.getOpcode() == PPC::SELECT_CC_QBRC ||
12134 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12135 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12136 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12137 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12138 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12139 MI.getOpcode() == PPC::SELECT_CC_SPE ||
12140 MI.getOpcode() == PPC::SELECT_F4 ||
12141 MI.getOpcode() == PPC::SELECT_F8 ||
12142 MI.getOpcode() == PPC::SELECT_F16 ||
12143 MI.getOpcode() == PPC::SELECT_QFRC ||
12144 MI.getOpcode() == PPC::SELECT_QSRC ||
12145 MI.getOpcode() == PPC::SELECT_QBRC ||
12146 MI.getOpcode() == PPC::SELECT_SPE ||
12147 MI.getOpcode() == PPC::SELECT_SPE4 ||
12148 MI.getOpcode() == PPC::SELECT_VRRC ||
12149 MI.getOpcode() == PPC::SELECT_VSFRC ||
12150 MI.getOpcode() == PPC::SELECT_VSSRC ||
12151 MI.getOpcode() == PPC::SELECT_VSRC) {
12152 // The incoming instruction knows the destination vreg to set, the
12153 // condition code register to branch on, the true/false values to
12154 // select between, and a branch opcode to use.
12155
12156 // thisMBB:
12157 // ...
12158 // TrueVal = ...
12159 // cmpTY ccX, r1, r2
12160 // bCC copy1MBB
12161 // fallthrough --> copy0MBB
12163 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12164 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12165 DebugLoc dl = MI.getDebugLoc();
12166 F->insert(It, copy0MBB);
12167 F->insert(It, sinkMBB);
12168
12169 // Transfer the remainder of BB and its successor edges to sinkMBB.
12170 sinkMBB->splice(sinkMBB->begin(), BB,
12171 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12172 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12173
12174 // Next, add the true and fallthrough blocks as its successors.
12176 BB->addSuccessor(sinkMBB);
12177
12178 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12179 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12180 MI.getOpcode() == PPC::SELECT_F16 ||
12181 MI.getOpcode() == PPC::SELECT_SPE4 ||
12182 MI.getOpcode() == PPC::SELECT_SPE ||
12183 MI.getOpcode() == PPC::SELECT_QFRC ||
12184 MI.getOpcode() == PPC::SELECT_QSRC ||
12185 MI.getOpcode() == PPC::SELECT_QBRC ||
12186 MI.getOpcode() == PPC::SELECT_VRRC ||
12187 MI.getOpcode() == PPC::SELECT_VSFRC ||
12188 MI.getOpcode() == PPC::SELECT_VSSRC ||
12189 MI.getOpcode() == PPC::SELECT_VSRC) {
12190 BuildMI(BB, dl, TII->get(PPC::BC))
12191 .addReg(MI.getOperand(1).getReg())
12192 .addMBB(sinkMBB);
12193 } else {
12194 unsigned SelectPred = MI.getOperand(4).getImm();
12195 BuildMI(BB, dl, TII->get(PPC::BCC))
12197 .addReg(MI.getOperand(1).getReg())
12198 .addMBB(sinkMBB);
12199 }
12200
12201 // copy0MBB:
12202 // %FalseValue = ...
12203 // # fallthrough to sinkMBB
12204 BB = copy0MBB;
12205
12206 // Update machine-CFG edges
12207 BB->addSuccessor(sinkMBB);
12208
12209 // sinkMBB:
12210 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12211 // ...
12212 BB = sinkMBB;
12213 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12214 .addReg(MI.getOperand(3).getReg())
12216 .addReg(MI.getOperand(2).getReg())
12217 .addMBB(thisMBB);
12218 } else if (MI.getOpcode() == PPC::ReadTB) {
12219 // To read the 64-bit time-base register on a 32-bit target, we read the
12220 // two halves. Should the counter have wrapped while it was being read, we
12221 // need to try again.
12222 // ...
12223 // readLoop:
12224 // mfspr Rx,TBU # load from TBU
12225 // mfspr Ry,TB # load from TB
12226 // mfspr Rz,TBU # load from TBU
12227 // cmpw crX,Rx,Rz # check if 'old'='new'
12228 // bne readLoop # branch if they're not equal
12229 // ...
12230
12231 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12232 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12233 DebugLoc dl = MI.getDebugLoc();
12234 F->insert(It, readMBB);
12235 F->insert(It, sinkMBB);
12236
12237 // Transfer the remainder of BB and its successor edges to sinkMBB.
12238 sinkMBB->splice(sinkMBB->begin(), BB,
12239 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12240 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12241
12242 BB->addSuccessor(readMBB);
12243 BB = readMBB;
12244
12245 MachineRegisterInfo &RegInfo = F->getRegInfo();
12246 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12247 Register LoReg = MI.getOperand(0).getReg();
12248 Register HiReg = MI.getOperand(1).getReg();
12249
12250 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12251 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12252 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12253
12254 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12255
12256 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12257 .addReg(HiReg)
12259 BuildMI(BB, dl, TII->get(PPC::BCC))
12261 .addReg(CmpReg)
12262 .addMBB(readMBB);
12263
12264 BB->addSuccessor(readMBB);
12265 BB->addSuccessor(sinkMBB);
12266 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12267 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12268 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12269 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12270 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12271 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12272 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12273 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12274
12275 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12276 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12277 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12278 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12279 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12280 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12281 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12282 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12283
12284 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12285 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12286 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12287 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12288 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12289 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12290 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12291 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12292
12293 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12294 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12295 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12296 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12297 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12298 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12299 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12300 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12301
12302 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12303 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12304 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12305 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12306 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12307 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12308 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12309 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12310
12311 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12312 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12313 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12314 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12315 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12316 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12317 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12318 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12319
12320 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12321 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12322 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12323 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12324 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12325 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12326 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12327 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12328
12329 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12330 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12331 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12332 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12333 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12334 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12335 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12336 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12337
12338 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12339 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12340 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12341 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12342 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12343 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12344 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12345 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12346
12347 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12348 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12349 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12350 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12351 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12352 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12353 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12354 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12355
12356 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12357 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12358 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12359 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12360 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12361 BB = EmitAtomicBinary(MI, BB, 4, 0);
12362 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12363 BB = EmitAtomicBinary(MI, BB, 8, 0);
12364 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12365 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12366 (Subtarget.hasPartwordAtomics() &&
12367 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12368 (Subtarget.hasPartwordAtomics() &&
12369 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12370 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12371
12372 auto LoadMnemonic = PPC::LDARX;
12373 auto StoreMnemonic = PPC::STDCX;
12374 switch (MI.getOpcode()) {
12375 default:
12376 llvm_unreachable("Compare and swap of unknown size");
12377 case PPC::ATOMIC_CMP_SWAP_I8:
12378 LoadMnemonic = PPC::LBARX;
12379 StoreMnemonic = PPC::STBCX;
12380 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12381 break;
12382 case PPC::ATOMIC_CMP_SWAP_I16:
12383 LoadMnemonic = PPC::LHARX;
12384 StoreMnemonic = PPC::STHCX;
12385 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12386 break;
12387 case PPC::ATOMIC_CMP_SWAP_I32:
12388 LoadMnemonic = PPC::LWARX;
12389 StoreMnemonic = PPC::STWCX;
12390 break;
12391 case PPC::ATOMIC_CMP_SWAP_I64:
12392 LoadMnemonic = PPC::LDARX;
12393 StoreMnemonic = PPC::STDCX;
12394 break;
12395 }
12396 Register dest = MI.getOperand(0).getReg();
12397 Register ptrA = MI.getOperand(1).getReg();
12398 Register ptrB = MI.getOperand(2).getReg();
12399 Register oldval = MI.getOperand(3).getReg();
12400 Register newval = MI.getOperand(4).getReg();
12401 DebugLoc dl = MI.getDebugLoc();
12402
12403 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12404 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12405 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12406 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12407 F->insert(It, loop1MBB);
12408 F->insert(It, loop2MBB);
12409 F->insert(It, midMBB);
12410 F->insert(It, exitMBB);
12411 exitMBB->splice(exitMBB->begin(), BB,
12412 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12413 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12414
12415 // thisMBB:
12416 // ...
12417 // fallthrough --> loopMBB
12419
12420 // loop1MBB:
12421 // l[bhwd]arx dest, ptr
12422 // cmp[wd] dest, oldval
12423 // bne- midMBB
12424 // loop2MBB:
12425 // st[bhwd]cx. newval, ptr
12426 // bne- loopMBB
12427 // b exitBB
12428 // midMBB:
12429 // st[bhwd]cx. dest, ptr
12430 // exitBB:
12431 BB = loop1MBB;
12432 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12433 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12434 .addReg(oldval)
12435 .addReg(dest);
12436 BuildMI(BB, dl, TII->get(PPC::BCC))
12438 .addReg(PPC::CR0)
12439 .addMBB(midMBB);
12441 BB->addSuccessor(midMBB);
12442
12443 BB = loop2MBB;
12444 BuildMI(BB, dl, TII->get(StoreMnemonic))
12445 .addReg(newval)
12446 .addReg(ptrA)
12447 .addReg(ptrB);
12448 BuildMI(BB, dl, TII->get(PPC::BCC))
12450 .addReg(PPC::CR0)
12451 .addMBB(loop1MBB);
12452 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12454 BB->addSuccessor(exitMBB);
12455
12456 BB = midMBB;
12457 BuildMI(BB, dl, TII->get(StoreMnemonic))
12458 .addReg(dest)
12459 .addReg(ptrA)
12460 .addReg(ptrB);
12461 BB->addSuccessor(exitMBB);
12462
12463 // exitMBB:
12464 // ...
12465 BB = exitMBB;
12466 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12467 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12468 // We must use 64-bit registers for addresses when targeting 64-bit,
12469 // since we're actually doing arithmetic on them. Other registers
12470 // can be 32-bit.
12471 bool is64bit = Subtarget.isPPC64();
12472 bool isLittleEndian = Subtarget.isLittleEndian();
12473 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12474
12475 Register dest = MI.getOperand(0).getReg();
12476 Register ptrA = MI.getOperand(1).getReg();
12477 Register ptrB = MI.getOperand(2).getReg();
12478 Register oldval = MI.getOperand(3).getReg();
12479 Register newval = MI.getOperand(4).getReg();
12480 DebugLoc dl = MI.getDebugLoc();
12481
12482 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12483 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12484 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12485 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12486 F->insert(It, loop1MBB);
12487 F->insert(It, loop2MBB);
12488 F->insert(It, midMBB);
12489 F->insert(It, exitMBB);
12490 exitMBB->splice(exitMBB->begin(), BB,
12491 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12492 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12493
12494 MachineRegisterInfo &RegInfo = F->getRegInfo();
12495 const TargetRegisterClass *RC =
12496 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12497 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12498
12499 Register PtrReg = RegInfo.createVirtualRegister(RC);
12500 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12501 Register ShiftReg =
12502 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12503 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12504 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12505 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12506 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12507 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12508 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12509 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12510 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12511 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12512 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12514 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12515 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12516 // thisMBB:
12517 // ...
12518 // fallthrough --> loopMBB
12520
12521 // The 4-byte load must be aligned, while a char or short may be
12522 // anywhere in the word. Hence all this nasty bookkeeping code.
12523 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12524 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12525 // xori shift, shift1, 24 [16]
12526 // rlwinm ptr, ptr1, 0, 0, 29
12527 // slw newval2, newval, shift
12528 // slw oldval2, oldval,shift
12529 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12530 // slw mask, mask2, shift
12531 // and newval3, newval2, mask
12532 // and oldval3, oldval2, mask
12533 // loop1MBB:
12534 // lwarx tmpDest, ptr
12535 // and tmp, tmpDest, mask
12536 // cmpw tmp, oldval3
12537 // bne- midMBB
12538 // loop2MBB:
12539 // andc tmp2, tmpDest, mask
12540 // or tmp4, tmp2, newval3
12541 // stwcx. tmp4, ptr
12542 // bne- loop1MBB
12543 // b exitBB
12544 // midMBB:
12545 // stwcx. tmpDest, ptr
12546 // exitBB:
12547 // srw dest, tmpDest, shift
12548 if (ptrA != ZeroReg) {
12549 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12550 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12551 .addReg(ptrA)
12552 .addReg(ptrB);
12553 } else {
12554 Ptr1Reg = ptrB;
12555 }
12556
12557 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12558 // mode.
12559 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12560 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12561 .addImm(3)
12562 .addImm(27)
12563 .addImm(is8bit ? 28 : 27);
12564 if (!isLittleEndian)
12565 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12567 .addImm(is8bit ? 24 : 16);
12568 if (is64bit)
12569 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12570 .addReg(Ptr1Reg)
12571 .addImm(0)
12572 .addImm(61);
12573 else
12574 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12575 .addReg(Ptr1Reg)
12576 .addImm(0)
12577 .addImm(0)
12578 .addImm(29);
12579 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12580 .addReg(newval)
12581 .addReg(ShiftReg);
12582 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12583 .addReg(oldval)
12584 .addReg(ShiftReg);
12585 if (is8bit)
12586 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12587 else {
12588 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12589 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12591 .addImm(65535);
12592 }
12593 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12595 .addReg(ShiftReg);
12596 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12598 .addReg(MaskReg);
12599 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12601 .addReg(MaskReg);
12602
12603 BB = loop1MBB;
12604 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12605 .addReg(ZeroReg)
12606 .addReg(PtrReg);
12607 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12609 .addReg(MaskReg);
12610 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12611 .addReg(TmpReg)
12613 BuildMI(BB, dl, TII->get(PPC::BCC))
12615 .addReg(PPC::CR0)
12616 .addMBB(midMBB);
12618 BB->addSuccessor(midMBB);
12619
12620 BB = loop2MBB;
12621 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12623 .addReg(MaskReg);
12624 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12625 .addReg(Tmp2Reg)
12627 BuildMI(BB, dl, TII->get(PPC::STWCX))
12628 .addReg(Tmp4Reg)
12629 .addReg(ZeroReg)
12630 .addReg(PtrReg);
12631 BuildMI(BB, dl, TII->get(PPC::BCC))
12633 .addReg(PPC::CR0)
12634 .addMBB(loop1MBB);
12635 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12637 BB->addSuccessor(exitMBB);
12638
12639 BB = midMBB;
12640 BuildMI(BB, dl, TII->get(PPC::STWCX))
12642 .addReg(ZeroReg)
12643 .addReg(PtrReg);
12644 BB->addSuccessor(exitMBB);
12645
12646 // exitMBB:
12647 // ...
12648 BB = exitMBB;
12649 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12650 .addReg(TmpReg)
12651 .addReg(ShiftReg);
12652 } else if (MI.getOpcode() == PPC::FADDrtz) {
12653 // This pseudo performs an FADD with rounding mode temporarily forced
12654 // to round-to-zero. We emit this via custom inserter since the FPSCR
12655 // is not modeled at the SelectionDAG level.
12656 Register Dest = MI.getOperand(0).getReg();
12657 Register Src1 = MI.getOperand(1).getReg();
12658 Register Src2 = MI.getOperand(2).getReg();
12659 DebugLoc dl = MI.getDebugLoc();
12660
12661 MachineRegisterInfo &RegInfo = F->getRegInfo();
12662 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12663
12664 // Save FPSCR value.
12665 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12666
12667 // Set rounding mode to round-to-zero.
12668 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
12669 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
12670
12671 // Perform addition.
12672 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
12673
12674 // Restore FPSCR value.
12675 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12676 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12677 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12678 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12679 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12680 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12681 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12682 ? PPC::ANDI8_rec
12683 : PPC::ANDI_rec;
12684 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12685 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12686
12687 MachineRegisterInfo &RegInfo = F->getRegInfo();
12688 Register Dest = RegInfo.createVirtualRegister(
12689 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12690
12691 DebugLoc Dl = MI.getDebugLoc();
12692 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12693 .addReg(MI.getOperand(1).getReg())
12694 .addImm(1);
12695 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12696 MI.getOperand(0).getReg())
12697 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12698 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12699 DebugLoc Dl = MI.getDebugLoc();
12700 MachineRegisterInfo &RegInfo = F->getRegInfo();
12701 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12702 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12703 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12704 MI.getOperand(0).getReg())
12705 .addReg(CRReg);
12706 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12707 DebugLoc Dl = MI.getDebugLoc();
12708 unsigned Imm = MI.getOperand(1).getImm();
12709 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12710 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12711 MI.getOperand(0).getReg())
12712 .addReg(PPC::CR0EQ);
12713 } else if (MI.getOpcode() == PPC::SETRNDi) {
12714 DebugLoc dl = MI.getDebugLoc();
12715 Register OldFPSCRReg = MI.getOperand(0).getReg();
12716
12717 // Save FPSCR value.
12718 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12719
12720 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12721 // the following settings:
12722 // 00 Round to nearest
12723 // 01 Round to 0
12724 // 10 Round to +inf
12725 // 11 Round to -inf
12726
12727 // When the operand is immediate, using the two least significant bits of
12728 // the immediate to set the bits 62:63 of FPSCR.
12729 unsigned Mode = MI.getOperand(1).getImm();
12730 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12731 .addImm(31);
12732
12733 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12734 .addImm(30);
12735 } else if (MI.getOpcode() == PPC::SETRND) {
12736 DebugLoc dl = MI.getDebugLoc();
12737
12738 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12739 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12740 // If the target doesn't have DirectMove, we should use stack to do the
12741 // conversion, because the target doesn't have the instructions like mtvsrd
12742 // or mfvsrd to do this conversion directly.
12743 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12744 if (Subtarget.hasDirectMove()) {
12745 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12746 .addReg(SrcReg);
12747 } else {
12748 // Use stack to do the register copy.
12749 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12750 MachineRegisterInfo &RegInfo = F->getRegInfo();
12751 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12752 if (RC == &PPC::F8RCRegClass) {
12753 // Copy register from F8RCRegClass to G8RCRegclass.
12754 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12755 "Unsupported RegClass.");
12756
12757 StoreOp = PPC::STFD;
12758 LoadOp = PPC::LD;
12759 } else {
12760 // Copy register from G8RCRegClass to F8RCRegclass.
12761 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12762 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12763 "Unsupported RegClass.");
12764 }
12765
12766 MachineFrameInfo &MFI = F->getFrameInfo();
12767 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12768
12769 MachineMemOperand *MMOStore = F->getMachineMemOperand(
12770 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12772 MFI.getObjectAlign(FrameIdx));
12773
12774 // Store the SrcReg into the stack.
12775 BuildMI(*BB, MI, dl, TII->get(StoreOp))
12776 .addReg(SrcReg)
12777 .addImm(0)
12778 .addFrameIndex(FrameIdx)
12780
12781 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12782 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12784 MFI.getObjectAlign(FrameIdx));
12785
12786 // Load from the stack where SrcReg is stored, and save to DestReg,
12787 // so we have done the RegClass conversion from RegClass::SrcReg to
12788 // RegClass::DestReg.
12789 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12790 .addImm(0)
12791 .addFrameIndex(FrameIdx)
12793 }
12794 };
12795
12796 Register OldFPSCRReg = MI.getOperand(0).getReg();
12797
12798 // Save FPSCR value.
12799 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12800
12801 // When the operand is gprc register, use two least significant bits of the
12802 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12803 //
12804 // copy OldFPSCRTmpReg, OldFPSCRReg
12805 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12806 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12807 // copy NewFPSCRReg, NewFPSCRTmpReg
12808 // mtfsf 255, NewFPSCRReg
12809 MachineOperand SrcOp = MI.getOperand(1);
12810 MachineRegisterInfo &RegInfo = F->getRegInfo();
12811 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12812
12814
12815 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12816 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12817
12818 // The first operand of INSERT_SUBREG should be a register which has
12819 // subregisters, we only care about its RegClass, so we should use an
12820 // IMPLICIT_DEF register.
12821 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12822 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12824 .add(SrcOp)
12825 .addImm(1);
12826
12827 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12828 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12831 .addImm(0)
12832 .addImm(62);
12833
12834 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12836
12837 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12838 // bits of FPSCR.
12839 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12840 .addImm(255)
12842 .addImm(0)
12843 .addImm(0);
12844 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12845 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12846 return emitProbedAlloca(MI, BB);
12847 } else {
12848 llvm_unreachable("Unexpected instr type to insert");
12849 }
12850
12851 MI.eraseFromParent(); // The pseudo instruction is gone now.
12852 return BB;
12853}
12854
12855//===----------------------------------------------------------------------===//
12856// Target Optimization Hooks
12857//===----------------------------------------------------------------------===//
12858
12859static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12860 // For the estimates, convergence is quadratic, so we essentially double the
12861 // number of digits correct after every iteration. For both FRE and FRSQRTE,
12862 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12863 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12864 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12865 if (VT.getScalarType() == MVT::f64)
12867 return RefinementSteps;
12868}
12869
12870SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12871 int Enabled, int &RefinementSteps,
12872 bool &UseOneConstNR,
12873 bool Reciprocal) const {
12874 EVT VT = Operand.getValueType();
12875 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12876 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12877 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12878 (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
12879 (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
12880 (VT == MVT::v4f64 && Subtarget.hasQPX())) {
12883
12884 // The Newton-Raphson computation with a single constant does not provide
12885 // enough accuracy on some CPUs.
12886 UseOneConstNR = !Subtarget.needsTwoConstNR();
12887 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12888 }
12889 return SDValue();
12890}
12891
12892SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12893 int Enabled,
12894 int &RefinementSteps) const {
12895 EVT VT = Operand.getValueType();
12896 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12897 (VT == MVT::f64 && Subtarget.hasFRE()) ||
12898 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12899 (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
12900 (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
12901 (VT == MVT::v4f64 && Subtarget.hasQPX())) {
12904 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12905 }
12906 return SDValue();
12907}
12908
12909unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12910 // Note: This functionality is used only when unsafe-fp-math is enabled, and
12911 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12912 // enabled for division), this functionality is redundant with the default
12913 // combiner logic (once the division -> reciprocal/multiply transformation
12914 // has taken place). As a result, this matters more for older cores than for
12915 // newer ones.
12916
12917 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12918 // reciprocal if there are two or more FDIVs (for embedded cores with only
12919 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12920 switch (Subtarget.getCPUDirective()) {
12921 default:
12922 return 3;
12923 case PPC::DIR_440:
12924 case PPC::DIR_A2:
12925 case PPC::DIR_E500:
12926 case PPC::DIR_E500mc:
12927 case PPC::DIR_E5500:
12928 return 2;
12929 }
12930}
12931
12932// isConsecutiveLSLoc needs to work even if all adds have not yet been
12933// collapsed, and so we need to look through chains of them.
12935 int64_t& Offset, SelectionDAG &DAG) {
12936 if (DAG.isBaseWithConstantOffset(Loc)) {
12937 Base = Loc.getOperand(0);
12938 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12939
12940 // The base might itself be a base plus an offset, and if so, accumulate
12941 // that as well.
12942 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12943 }
12944}
12945
12946static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12947 unsigned Bytes, int Dist,
12948 SelectionDAG &DAG) {
12949 if (VT.getSizeInBits() / 8 != Bytes)
12950 return false;
12951
12952 SDValue BaseLoc = Base->getBasePtr();
12953 if (Loc.getOpcode() == ISD::FrameIndex) {
12954 if (BaseLoc.getOpcode() != ISD::FrameIndex)
12955 return false;
12957 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
12958 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12959 int FS = MFI.getObjectSize(FI);
12960 int BFS = MFI.getObjectSize(BFI);
12961 if (FS != BFS || FS != (int)Bytes) return false;
12962 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12963 }
12964
12965 SDValue Base1 = Loc, Base2 = BaseLoc;
12966 int64_t Offset1 = 0, Offset2 = 0;
12969 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12970 return true;
12971
12972 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12973 const GlobalValue *GV1 = nullptr;
12974 const GlobalValue *GV2 = nullptr;
12975 Offset1 = 0;
12976 Offset2 = 0;
12977 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12978 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12979 if (isGA1 && isGA2 && GV1 == GV2)
12980 return Offset1 == (Offset2 + Dist*Bytes);
12981 return false;
12982}
12983
12984// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12985// not enforce equality of the chain operands.
12987 unsigned Bytes, int Dist,
12988 SelectionDAG &DAG) {
12990 EVT VT = LS->getMemoryVT();
12991 SDValue Loc = LS->getBasePtr();
12992 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12993 }
12994
12995 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12996 EVT VT;
12997 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12998 default: return false;
12999 case Intrinsic::ppc_qpx_qvlfd:
13000 case Intrinsic::ppc_qpx_qvlfda:
13001 VT = MVT::v4f64;
13002 break;
13003 case Intrinsic::ppc_qpx_qvlfs:
13004 case Intrinsic::ppc_qpx_qvlfsa:
13005 VT = MVT::v4f32;
13006 break;
13007 case Intrinsic::ppc_qpx_qvlfcd:
13008 case Intrinsic::ppc_qpx_qvlfcda:
13009 VT = MVT::v2f64;
13010 break;
13011 case Intrinsic::ppc_qpx_qvlfcs:
13012 case Intrinsic::ppc_qpx_qvlfcsa:
13013 VT = MVT::v2f32;
13014 break;
13015 case Intrinsic::ppc_qpx_qvlfiwa:
13016 case Intrinsic::ppc_qpx_qvlfiwz:
13017 case Intrinsic::ppc_altivec_lvx:
13018 case Intrinsic::ppc_altivec_lvxl:
13019 case Intrinsic::ppc_vsx_lxvw4x:
13020 case Intrinsic::ppc_vsx_lxvw4x_be:
13021 VT = MVT::v4i32;
13022 break;
13023 case Intrinsic::ppc_vsx_lxvd2x:
13024 case Intrinsic::ppc_vsx_lxvd2x_be:
13025 VT = MVT::v2f64;
13026 break;
13027 case Intrinsic::ppc_altivec_lvebx:
13028 VT = MVT::i8;
13029 break;
13030 case Intrinsic::ppc_altivec_lvehx:
13031 VT = MVT::i16;
13032 break;
13033 case Intrinsic::ppc_altivec_lvewx:
13034 VT = MVT::i32;
13035 break;
13036 }
13037
13038 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13039 }
13040
13041 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13042 EVT VT;
13043 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13044 default: return false;
13045 case Intrinsic::ppc_qpx_qvstfd:
13046 case Intrinsic::ppc_qpx_qvstfda:
13047 VT = MVT::v4f64;
13048 break;
13049 case Intrinsic::ppc_qpx_qvstfs:
13050 case Intrinsic::ppc_qpx_qvstfsa:
13051 VT = MVT::v4f32;
13052 break;
13053 case Intrinsic::ppc_qpx_qvstfcd:
13054 case Intrinsic::ppc_qpx_qvstfcda:
13055 VT = MVT::v2f64;
13056 break;
13057 case Intrinsic::ppc_qpx_qvstfcs:
13058 case Intrinsic::ppc_qpx_qvstfcsa:
13059 VT = MVT::v2f32;
13060 break;
13061 case Intrinsic::ppc_qpx_qvstfiw:
13062 case Intrinsic::ppc_qpx_qvstfiwa:
13063 case Intrinsic::ppc_altivec_stvx:
13064 case Intrinsic::ppc_altivec_stvxl:
13065 case Intrinsic::ppc_vsx_stxvw4x:
13066 VT = MVT::v4i32;
13067 break;
13068 case Intrinsic::ppc_vsx_stxvd2x:
13069 VT = MVT::v2f64;
13070 break;
13071 case Intrinsic::ppc_vsx_stxvw4x_be:
13072 VT = MVT::v4i32;
13073 break;
13074 case Intrinsic::ppc_vsx_stxvd2x_be:
13075 VT = MVT::v2f64;
13076 break;
13077 case Intrinsic::ppc_altivec_stvebx:
13078 VT = MVT::i8;
13079 break;
13080 case Intrinsic::ppc_altivec_stvehx:
13081 VT = MVT::i16;
13082 break;
13083 case Intrinsic::ppc_altivec_stvewx:
13084 VT = MVT::i32;
13085 break;
13086 }
13087
13088 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13089 }
13090
13091 return false;
13092}
13093
13094// Return true is there is a nearyby consecutive load to the one provided
13095// (regardless of alignment). We search up and down the chain, looking though
13096// token factors and other loads (but nothing else). As a result, a true result
13097// indicates that it is safe to create a new consecutive load adjacent to the
13098// load provided.
13100 SDValue Chain = LD->getChain();
13101 EVT VT = LD->getMemoryVT();
13102
13104 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13105 SmallSet<SDNode *, 16> Visited;
13106
13107 // First, search up the chain, branching to follow all token-factor operands.
13108 // If we find a consecutive load, then we're done, otherwise, record all
13109 // nodes just above the top-level loads and token factors.
13110 while (!Queue.empty()) {
13111 SDNode *ChainNext = Queue.pop_back_val();
13112 if (!Visited.insert(ChainNext).second)
13113 continue;
13114
13116 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13117 return true;
13118
13119 if (!Visited.count(ChainLD->getChain().getNode()))
13120 Queue.push_back(ChainLD->getChain().getNode());
13121 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13122 for (const SDUse &O : ChainNext->ops())
13123 if (!Visited.count(O.getNode()))
13124 Queue.push_back(O.getNode());
13125 } else
13126 LoadRoots.insert(ChainNext);
13127 }
13128
13129 // Second, search down the chain, starting from the top-level nodes recorded
13130 // in the first phase. These top-level nodes are the nodes just above all
13131 // loads and token factors. Starting with their uses, recursively look though
13132 // all loads (just the chain uses) and token factors to find a consecutive
13133 // load.
13134 Visited.clear();
13135 Queue.clear();
13136
13138 IE = LoadRoots.end(); I != IE; ++I) {
13139 Queue.push_back(*I);
13140
13141 while (!Queue.empty()) {
13142 SDNode *LoadRoot = Queue.pop_back_val();
13143 if (!Visited.insert(LoadRoot).second)
13144 continue;
13145
13147 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13148 return true;
13149
13150 for (SDNode::use_iterator UI = LoadRoot->use_begin(),
13151 UE = LoadRoot->use_end(); UI != UE; ++UI)
13152 if (((isa<MemSDNode>(*UI) &&
13153 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
13154 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
13155 Queue.push_back(*UI);
13156 }
13157 }
13158
13159 return false;
13160}
13161
13162/// This function is called when we have proved that a SETCC node can be replaced
13163/// by subtraction (and other supporting instructions) so that the result of
13164/// comparison is kept in a GPR instead of CR. This function is purely for
13165/// codegen purposes and has some flags to guide the codegen process.
13167 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13168 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13169
13170 // Zero extend the operands to the largest legal integer. Originally, they
13171 // must be of a strictly smaller size.
13172 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13173 DAG.getConstant(Size, DL, MVT::i32));
13174 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13175 DAG.getConstant(Size, DL, MVT::i32));
13176
13177 // Swap if needed. Depends on the condition code.
13178 if (Swap)
13179 std::swap(Op0, Op1);
13180
13181 // Subtract extended integers.
13182 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13183
13184 // Move the sign bit to the least significant position and zero out the rest.
13185 // Now the least significant bit carries the result of original comparison.
13186 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13187 DAG.getConstant(Size - 1, DL, MVT::i32));
13188 auto Final = Shifted;
13189
13190 // Complement the result if needed. Based on the condition code.
13191 if (Complement)
13192 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13193 DAG.getConstant(1, DL, MVT::i64));
13194
13195 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13196}
13197
13198SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13199 DAGCombinerInfo &DCI) const {
13200 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13201
13202 SelectionDAG &DAG = DCI.DAG;
13203 SDLoc DL(N);
13204
13205 // Size of integers being compared has a critical role in the following
13206 // analysis, so we prefer to do this when all types are legal.
13207 if (!DCI.isAfterLegalizeDAG())
13208 return SDValue();
13209
13210 // If all users of SETCC extend its value to a legal integer type
13211 // then we replace SETCC with a subtraction
13212 for (SDNode::use_iterator UI = N->use_begin(),
13213 UE = N->use_end(); UI != UE; ++UI) {
13214 if (UI->getOpcode() != ISD::ZERO_EXTEND)
13215 return SDValue();
13216 }
13217
13218 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13219 auto OpSize = N->getOperand(0).getValueSizeInBits();
13220
13222
13223 if (OpSize < Size) {
13224 switch (CC) {
13225 default: break;
13226 case ISD::SETULT:
13227 return generateEquivalentSub(N, Size, false, false, DL, DAG);
13228 case ISD::SETULE:
13229 return generateEquivalentSub(N, Size, true, true, DL, DAG);
13230 case ISD::SETUGT:
13231 return generateEquivalentSub(N, Size, false, true, DL, DAG);
13232 case ISD::SETUGE:
13233 return generateEquivalentSub(N, Size, true, false, DL, DAG);
13234 }
13235 }
13236
13237 return SDValue();
13238}
13239
13240SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13241 DAGCombinerInfo &DCI) const {
13242 SelectionDAG &DAG = DCI.DAG;
13243 SDLoc dl(N);
13244
13245 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13246 // If we're tracking CR bits, we need to be careful that we don't have:
13247 // trunc(binary-ops(zext(x), zext(y)))
13248 // or
13249 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13250 // such that we're unnecessarily moving things into GPRs when it would be
13251 // better to keep them in CR bits.
13252
13253 // Note that trunc here can be an actual i1 trunc, or can be the effective
13254 // truncation that comes from a setcc or select_cc.
13255 if (N->getOpcode() == ISD::TRUNCATE &&
13256 N->getValueType(0) != MVT::i1)
13257 return SDValue();
13258
13259 if (N->getOperand(0).getValueType() != MVT::i32 &&
13260 N->getOperand(0).getValueType() != MVT::i64)
13261 return SDValue();
13262
13263 if (N->getOpcode() == ISD::SETCC ||
13264 N->getOpcode() == ISD::SELECT_CC) {
13265 // If we're looking at a comparison, then we need to make sure that the
13266 // high bits (all except for the first) don't matter the result.
13267 ISD::CondCode CC =
13268 cast<CondCodeSDNode>(N->getOperand(
13269 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13270 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13271
13272 if (ISD::isSignedIntSetCC(CC)) {
13273 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13274 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13275 return SDValue();
13276 } else if (ISD::isUnsignedIntSetCC(CC)) {
13277 if (!DAG.MaskedValueIsZero(N->getOperand(0),
13279 !DAG.MaskedValueIsZero(N->getOperand(1),
13281 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13282 : SDValue());
13283 } else {
13284 // This is neither a signed nor an unsigned comparison, just make sure
13285 // that the high bits are equal.
13286 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13287 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13288
13289 // We don't really care about what is known about the first bit (if
13290 // anything), so clear it in all masks prior to comparing them.
13291 Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
13292 Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
13293
13294 if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
13295 return SDValue();
13296 }
13297 }
13298
13299 // We now know that the higher-order bits are irrelevant, we just need to
13300 // make sure that all of the intermediate operations are bit operations, and
13301 // all inputs are extensions.
13302 if (N->getOperand(0).getOpcode() != ISD::AND &&
13303 N->getOperand(0).getOpcode() != ISD::OR &&
13304 N->getOperand(0).getOpcode() != ISD::XOR &&
13305 N->getOperand(0).getOpcode() != ISD::SELECT &&
13306 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13307 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13308 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13309 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13310 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13311 return SDValue();
13312
13313 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13314 N->getOperand(1).getOpcode() != ISD::AND &&
13315 N->getOperand(1).getOpcode() != ISD::OR &&
13316 N->getOperand(1).getOpcode() != ISD::XOR &&
13317 N->getOperand(1).getOpcode() != ISD::SELECT &&
13318 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13319 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13320 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13321 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13322 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13323 return SDValue();
13324
13328
13329 for (unsigned i = 0; i < 2; ++i) {
13330 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13331 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13332 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13333 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13334 isa<ConstantSDNode>(N->getOperand(i)))
13335 Inputs.push_back(N->getOperand(i));
13336 else
13337 BinOps.push_back(N->getOperand(i));
13338
13339 if (N->getOpcode() == ISD::TRUNCATE)
13340 break;
13341 }
13342
13343 // Visit all inputs, collect all binary operations (and, or, xor and
13344 // select) that are all fed by extensions.
13345 while (!BinOps.empty()) {
13346 SDValue BinOp = BinOps.back();
13347 BinOps.pop_back();
13348
13349 if (!Visited.insert(BinOp.getNode()).second)
13350 continue;
13351
13352 PromOps.push_back(BinOp);
13353
13354 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13355 // The condition of the select is not promoted.
13356 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13357 continue;
13358 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13359 continue;
13360
13361 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13362 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13363 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13364 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13366 Inputs.push_back(BinOp.getOperand(i));
13367 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13368 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13369 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13370 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13371 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13372 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13373 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13374 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13375 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13376 BinOps.push_back(BinOp.getOperand(i));
13377 } else {
13378 // We have an input that is not an extension or another binary
13379 // operation; we'll abort this transformation.
13380 return SDValue();
13381 }
13382 }
13383 }
13384
13385 // Make sure that this is a self-contained cluster of operations (which
13386 // is not quite the same thing as saying that everything has only one
13387 // use).
13388 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13389 if (isa<ConstantSDNode>(Inputs[i]))
13390 continue;
13391
13392 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13393 UE = Inputs[i].getNode()->use_end();
13394 UI != UE; ++UI) {
13395 SDNode *User = *UI;
13396 if (User != N && !Visited.count(User))
13397 return SDValue();
13398
13399 // Make sure that we're not going to promote the non-output-value
13400 // operand(s) or SELECT or SELECT_CC.
13401 // FIXME: Although we could sometimes handle this, and it does occur in
13402 // practice that one of the condition inputs to the select is also one of
13403 // the outputs, we currently can't deal with this.
13404 if (User->getOpcode() == ISD::SELECT) {
13405 if (User->getOperand(0) == Inputs[i])
13406 return SDValue();
13407 } else if (User->getOpcode() == ISD::SELECT_CC) {
13408 if (User->getOperand(0) == Inputs[i] ||
13409 User->getOperand(1) == Inputs[i])
13410 return SDValue();
13411 }
13412 }
13413 }
13414
13415 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13416 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13417 UE = PromOps[i].getNode()->use_end();
13418 UI != UE; ++UI) {
13419 SDNode *User = *UI;
13420 if (User != N && !Visited.count(User))
13421 return SDValue();
13422
13423 // Make sure that we're not going to promote the non-output-value
13424 // operand(s) or SELECT or SELECT_CC.
13425 // FIXME: Although we could sometimes handle this, and it does occur in
13426 // practice that one of the condition inputs to the select is also one of
13427 // the outputs, we currently can't deal with this.
13428 if (User->getOpcode() == ISD::SELECT) {
13429 if (User->getOperand(0) == PromOps[i])
13430 return SDValue();
13431 } else if (User->getOpcode() == ISD::SELECT_CC) {
13432 if (User->getOperand(0) == PromOps[i] ||
13433 User->getOperand(1) == PromOps[i])
13434 return SDValue();
13435 }
13436 }
13437 }
13438
13439 // Replace all inputs with the extension operand.
13440 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13441 // Constants may have users outside the cluster of to-be-promoted nodes,
13442 // and so we need to replace those as we do the promotions.
13443 if (isa<ConstantSDNode>(Inputs[i]))
13444 continue;
13445 else
13446 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13447 }
13448
13449 std::list<HandleSDNode> PromOpHandles;
13450 for (auto &PromOp : PromOps)
13451 PromOpHandles.emplace_back(PromOp);
13452
13453 // Replace all operations (these are all the same, but have a different
13454 // (i1) return type). DAG.getNode will validate that the types of
13455 // a binary operator match, so go through the list in reverse so that
13456 // we've likely promoted both operands first. Any intermediate truncations or
13457 // extensions disappear.
13458 while (!PromOpHandles.empty()) {
13459 SDValue PromOp = PromOpHandles.back().getValue();
13460 PromOpHandles.pop_back();
13461
13462 if (PromOp.getOpcode() == ISD::TRUNCATE ||
13463 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13464 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13465 PromOp.getOpcode() == ISD::ANY_EXTEND) {
13466 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13467 PromOp.getOperand(0).getValueType() != MVT::i1) {
13468 // The operand is not yet ready (see comment below).
13469 PromOpHandles.emplace_front(PromOp);
13470 continue;
13471 }
13472
13473 SDValue RepValue = PromOp.getOperand(0);
13476
13478 continue;
13479 }
13480
13481 unsigned C;
13482 switch (PromOp.getOpcode()) {
13483 default: C = 0; break;
13484 case ISD::SELECT: C = 1; break;
13485 case ISD::SELECT_CC: C = 2; break;
13486 }
13487
13488 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13489 PromOp.getOperand(C).getValueType() != MVT::i1) ||
13490 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13491 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13492 // The to-be-promoted operands of this node have not yet been
13493 // promoted (this should be rare because we're going through the
13494 // list backward, but if one of the operands has several users in
13495 // this cluster of to-be-promoted nodes, it is possible).
13496 PromOpHandles.emplace_front(PromOp);
13497 continue;
13498 }
13499
13500 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13501 PromOp.getNode()->op_end());
13502
13503 // If there are any constant inputs, make sure they're replaced now.
13504 for (unsigned i = 0; i < 2; ++i)
13505 if (isa<ConstantSDNode>(Ops[C+i]))
13506 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13507
13509 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13510 }
13511
13512 // Now we're left with the initial truncation itself.
13513 if (N->getOpcode() == ISD::TRUNCATE)
13514 return N->getOperand(0);
13515
13516 // Otherwise, this is a comparison. The operands to be compared have just
13517 // changed type (to i1), but everything else is the same.
13518 return SDValue(N, 0);
13519}
13520
13521SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13522 DAGCombinerInfo &DCI) const {
13523 SelectionDAG &DAG = DCI.DAG;
13524 SDLoc dl(N);
13525
13526 // If we're tracking CR bits, we need to be careful that we don't have:
13527 // zext(binary-ops(trunc(x), trunc(y)))
13528 // or
13529 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13530 // such that we're unnecessarily moving things into CR bits that can more
13531 // efficiently stay in GPRs. Note that if we're not certain that the high
13532 // bits are set as required by the final extension, we still may need to do
13533 // some masking to get the proper behavior.
13534
13535 // This same functionality is important on PPC64 when dealing with
13536 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13537 // the return values of functions. Because it is so similar, it is handled
13538 // here as well.
13539
13540 if (N->getValueType(0) != MVT::i32 &&
13541 N->getValueType(0) != MVT::i64)
13542 return SDValue();
13543
13544 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13545 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13546 return SDValue();
13547
13548 if (N->getOperand(0).getOpcode() != ISD::AND &&
13549 N->getOperand(0).getOpcode() != ISD::OR &&
13550 N->getOperand(0).getOpcode() != ISD::XOR &&
13551 N->getOperand(0).getOpcode() != ISD::SELECT &&
13552 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13553 return SDValue();
13554
13556 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13558
13559 // Visit all inputs, collect all binary operations (and, or, xor and
13560 // select) that are all fed by truncations.
13561 while (!BinOps.empty()) {
13562 SDValue BinOp = BinOps.back();
13563 BinOps.pop_back();
13564
13565 if (!Visited.insert(BinOp.getNode()).second)
13566 continue;
13567
13568 PromOps.push_back(BinOp);
13569
13570 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13571 // The condition of the select is not promoted.
13572 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13573 continue;
13574 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13575 continue;
13576
13577 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13579 Inputs.push_back(BinOp.getOperand(i));
13580 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13581 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13582 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13583 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13584 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13585 BinOps.push_back(BinOp.getOperand(i));
13586 } else {
13587 // We have an input that is not a truncation or another binary
13588 // operation; we'll abort this transformation.
13589 return SDValue();
13590 }
13591 }
13592 }
13593
13594 // The operands of a select that must be truncated when the select is
13595 // promoted because the operand is actually part of the to-be-promoted set.
13597
13598 // Make sure that this is a self-contained cluster of operations (which
13599 // is not quite the same thing as saying that everything has only one
13600 // use).
13601 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13602 if (isa<ConstantSDNode>(Inputs[i]))
13603 continue;
13604
13605 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13606 UE = Inputs[i].getNode()->use_end();
13607 UI != UE; ++UI) {
13608 SDNode *User = *UI;
13609 if (User != N && !Visited.count(User))
13610 return SDValue();
13611
13612 // If we're going to promote the non-output-value operand(s) or SELECT or
13613 // SELECT_CC, record them for truncation.
13614 if (User->getOpcode() == ISD::SELECT) {
13615 if (User->getOperand(0) == Inputs[i])
13616 SelectTruncOp[0].insert(std::make_pair(User,
13617 User->getOperand(0).getValueType()));
13618 } else if (User->getOpcode() == ISD::SELECT_CC) {
13619 if (User->getOperand(0) == Inputs[i])
13620 SelectTruncOp[0].insert(std::make_pair(User,
13621 User->getOperand(0).getValueType()));
13622 if (User->getOperand(1) == Inputs[i])
13623 SelectTruncOp[1].insert(std::make_pair(User,
13624 User->getOperand(1).getValueType()));
13625 }
13626 }
13627 }
13628
13629 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13630 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13631 UE = PromOps[i].getNode()->use_end();
13632 UI != UE; ++UI) {
13633 SDNode *User = *UI;
13634 if (User != N && !Visited.count(User))
13635 return SDValue();
13636
13637 // If we're going to promote the non-output-value operand(s) or SELECT or
13638 // SELECT_CC, record them for truncation.
13639 if (User->getOpcode() == ISD::SELECT) {
13640 if (User->getOperand(0) == PromOps[i])
13641 SelectTruncOp[0].insert(std::make_pair(User,
13642 User->getOperand(0).getValueType()));
13643 } else if (User->getOpcode() == ISD::SELECT_CC) {
13644 if (User->getOperand(0) == PromOps[i])
13645 SelectTruncOp[0].insert(std::make_pair(User,
13646 User->getOperand(0).getValueType()));
13647 if (User->getOperand(1) == PromOps[i])
13648 SelectTruncOp[1].insert(std::make_pair(User,
13649 User->getOperand(1).getValueType()));
13650 }
13651 }
13652 }
13653
13654 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13655 bool ReallyNeedsExt = false;
13656 if (N->getOpcode() != ISD::ANY_EXTEND) {
13657 // If all of the inputs are not already sign/zero extended, then
13658 // we'll still need to do that at the end.
13659 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13660 if (isa<ConstantSDNode>(Inputs[i]))
13661 continue;
13662
13663 unsigned OpBits =
13664 Inputs[i].getOperand(0).getValueSizeInBits();
13665 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13666
13667 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13668 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13670 OpBits-PromBits))) ||
13671 (N->getOpcode() == ISD::SIGN_EXTEND &&
13672 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13673 (OpBits-(PromBits-1)))) {
13674 ReallyNeedsExt = true;
13675 break;
13676 }
13677 }
13678 }
13679
13680 // Replace all inputs, either with the truncation operand, or a
13681 // truncation or extension to the final output type.
13682 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13683 // Constant inputs need to be replaced with the to-be-promoted nodes that
13684 // use them because they might have users outside of the cluster of
13685 // promoted nodes.
13686 if (isa<ConstantSDNode>(Inputs[i]))
13687 continue;
13688
13689 SDValue InSrc = Inputs[i].getOperand(0);
13690 if (Inputs[i].getValueType() == N->getValueType(0))
13691 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13692 else if (N->getOpcode() == ISD::SIGN_EXTEND)
13693 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13694 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13695 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13696 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13697 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13698 else
13699 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13700 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13701 }
13702
13703 std::list<HandleSDNode> PromOpHandles;
13704 for (auto &PromOp : PromOps)
13705 PromOpHandles.emplace_back(PromOp);
13706
13707 // Replace all operations (these are all the same, but have a different
13708 // (promoted) return type). DAG.getNode will validate that the types of
13709 // a binary operator match, so go through the list in reverse so that
13710 // we've likely promoted both operands first.
13711 while (!PromOpHandles.empty()) {
13712 SDValue PromOp = PromOpHandles.back().getValue();
13713 PromOpHandles.pop_back();
13714
13715 unsigned C;
13716 switch (PromOp.getOpcode()) {
13717 default: C = 0; break;
13718 case ISD::SELECT: C = 1; break;
13719 case ISD::SELECT_CC: C = 2; break;
13720 }
13721
13722 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13723 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13724 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13725 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13726 // The to-be-promoted operands of this node have not yet been
13727 // promoted (this should be rare because we're going through the
13728 // list backward, but if one of the operands has several users in
13729 // this cluster of to-be-promoted nodes, it is possible).
13730 PromOpHandles.emplace_front(PromOp);
13731 continue;
13732 }
13733
13734 // For SELECT and SELECT_CC nodes, we do a similar check for any
13735 // to-be-promoted comparison inputs.
13736 if (PromOp.getOpcode() == ISD::SELECT ||
13737 PromOp.getOpcode() == ISD::SELECT_CC) {
13738 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13739 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13740 (SelectTruncOp[1].count(PromOp.getNode()) &&
13741 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13742 PromOpHandles.emplace_front(PromOp);
13743 continue;
13744 }
13745 }
13746
13747 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13748 PromOp.getNode()->op_end());
13749
13750 // If this node has constant inputs, then they'll need to be promoted here.
13751 for (unsigned i = 0; i < 2; ++i) {
13752 if (!isa<ConstantSDNode>(Ops[C+i]))
13753 continue;
13754 if (Ops[C+i].getValueType() == N->getValueType(0))
13755 continue;
13756
13757 if (N->getOpcode() == ISD::SIGN_EXTEND)
13758 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13759 else if (N->getOpcode() == ISD::ZERO_EXTEND)
13760 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13761 else
13762 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13763 }
13764
13765 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13766 // truncate them again to the original value type.
13767 if (PromOp.getOpcode() == ISD::SELECT ||
13768 PromOp.getOpcode() == ISD::SELECT_CC) {
13769 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13770 if (SI0 != SelectTruncOp[0].end())
13771 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13772 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13773 if (SI1 != SelectTruncOp[1].end())
13774 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13775 }
13776
13778 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13779 }
13780
13781 // Now we're left with the initial extension itself.
13782 if (!ReallyNeedsExt)
13783 return N->getOperand(0);
13784
13785 // To zero extend, just mask off everything except for the first bit (in the
13786 // i1 case).
13787 if (N->getOpcode() == ISD::ZERO_EXTEND)
13788 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13790 N->getValueSizeInBits(0), PromBits),
13791 dl, N->getValueType(0)));
13792
13793 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13794 "Invalid extension type");
13795 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13797 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13798 return DAG.getNode(
13799 ISD::SRA, dl, N->getValueType(0),
13800 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13801 ShiftCst);
13802}
13803
13804SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13805 DAGCombinerInfo &DCI) const {
13806 assert(N->getOpcode() == ISD::SETCC &&
13807 "Should be called with a SETCC node");
13808
13809 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13810 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13811 SDValue LHS = N->getOperand(0);
13812 SDValue RHS = N->getOperand(1);
13813
13814 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13815 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13816 LHS.hasOneUse())
13817 std::swap(LHS, RHS);
13818
13819 // x == 0-y --> x+y == 0
13820 // x != 0-y --> x+y != 0
13821 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13822 RHS.hasOneUse()) {
13823 SDLoc DL(N);
13824 SelectionDAG &DAG = DCI.DAG;
13825 EVT VT = N->getValueType(0);
13826 EVT OpVT = LHS.getValueType();
13827 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13828 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13829 }
13830 }
13831
13832 return DAGCombineTruncBoolExt(N, DCI);
13833}
13834
13835// Is this an extending load from an f32 to an f64?
13836static bool isFPExtLoad(SDValue Op) {
13837 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13838 return LD->getExtensionType() == ISD::EXTLOAD &&
13839 Op.getValueType() == MVT::f64;
13840 return false;
13841}
13842
13843/// Reduces the number of fp-to-int conversion when building a vector.
13844///
13845/// If this vector is built out of floating to integer conversions,
13846/// transform it to a vector built out of floating point values followed by a
13847/// single floating to integer conversion of the vector.
13848/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
13849/// becomes (fptosi (build_vector ($A, $B, ...)))
13850SDValue PPCTargetLowering::
13851combineElementTruncationToVectorTruncation(SDNode *N,
13852 DAGCombinerInfo &DCI) const {
13853 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13854 "Should be called with a BUILD_VECTOR node");
13855
13856 SelectionDAG &DAG = DCI.DAG;
13857 SDLoc dl(N);
13858
13859 SDValue FirstInput = N->getOperand(0);
13860 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13861 "The input operand must be an fp-to-int conversion.");
13862
13863 // This combine happens after legalization so the fp_to_[su]i nodes are
13864 // already converted to PPCSISD nodes.
13865 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13870 bool IsSplat = true;
13873 EVT SrcVT = FirstInput.getOperand(0).getValueType();
13875 EVT TargetVT = N->getValueType(0);
13876 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13877 SDValue NextOp = N->getOperand(i);
13878 if (NextOp.getOpcode() != PPCISD::MFVSR)
13879 return SDValue();
13880 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13882 return SDValue();
13883 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13884 // This is not valid if the input was originally double precision. It is
13885 // also not profitable to do unless this is an extending load in which
13886 // case doing this combine will allow us to combine consecutive loads.
13887 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13888 return SDValue();
13889 if (N->getOperand(i) != FirstInput)
13890 IsSplat = false;
13891 }
13892
13893 // If this is a splat, we leave it as-is since there will be only a single
13894 // fp-to-int conversion followed by a splat of the integer. This is better
13895 // for 32-bit and smaller ints and neutral for 64-bit ints.
13896 if (IsSplat)
13897 return SDValue();
13898
13899 // Now that we know we have the right type of node, get its operands
13900 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13901 SDValue In = N->getOperand(i).getOperand(0);
13902 if (Is32Bit) {
13903 // For 32-bit values, we need to add an FP_ROUND node (if we made it
13904 // here, we know that all inputs are extending loads so this is safe).
13905 if (In.isUndef())
13906 Ops.push_back(DAG.getUNDEF(SrcVT));
13907 else {
13908 SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13909 MVT::f32, In.getOperand(0),
13910 DAG.getIntPtrConstant(1, dl));
13911 Ops.push_back(Trunc);
13912 }
13913 } else
13914 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13915 }
13916
13917 unsigned Opcode;
13920 Opcode = ISD::FP_TO_SINT;
13921 else
13922 Opcode = ISD::FP_TO_UINT;
13923
13925 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13926 return DAG.getNode(Opcode, dl, TargetVT, BV);
13927 }
13928 return SDValue();
13929}
13930
13931/// Reduce the number of loads when building a vector.
13932///
13933/// Building a vector out of multiple loads can be converted to a load
13934/// of the vector type if the loads are consecutive. If the loads are
13935/// consecutive but in descending order, a shuffle is added at the end
13936/// to reorder the vector.
13938 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13939 "Should be called with a BUILD_VECTOR node");
13940
13941 SDLoc dl(N);
13942
13943 // Return early for non byte-sized type, as they can't be consecutive.
13944 if (!N->getValueType(0).getVectorElementType().isByteSized())
13945 return SDValue();
13946
13947 bool InputsAreConsecutiveLoads = true;
13948 bool InputsAreReverseConsecutive = true;
13949 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13950 SDValue FirstInput = N->getOperand(0);
13951 bool IsRoundOfExtLoad = false;
13952
13953 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13954 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13955 LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13956 IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13957 }
13958 // Not a build vector of (possibly fp_rounded) loads.
13959 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13960 N->getNumOperands() == 1)
13961 return SDValue();
13962
13963 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13964 // If any inputs are fp_round(extload), they all must be.
13965 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13966 return SDValue();
13967
13968 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13969 N->getOperand(i);
13970 if (NextInput.getOpcode() != ISD::LOAD)
13971 return SDValue();
13972
13974 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13977
13978 // If any inputs are fp_round(extload), they all must be.
13979 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13980 return SDValue();
13981
13982 if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13984 if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13986
13987 // Exit early if the loads are neither consecutive nor reverse consecutive.
13989 return SDValue();
13990 }
13991
13993 "The loads cannot be both consecutive and reverse consecutive.");
13994
13996 IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13998 IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13999 N->getOperand(N->getNumOperands()-1);
14000
14004 assert(LD1 && "Input needs to be a LoadSDNode.");
14005 return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
14006 LD1->getBasePtr(), LD1->getPointerInfo(),
14007 LD1->getAlignment());
14008 }
14010 assert(LDL && "Input needs to be a LoadSDNode.");
14011 SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
14012 LDL->getBasePtr(), LDL->getPointerInfo(),
14013 LDL->getAlignment());
14015 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14016 Ops.push_back(i);
14017
14018 return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
14019 DAG.getUNDEF(N->getValueType(0)), Ops);
14020 }
14021 return SDValue();
14022}
14023
14024// This function adds the required vector_shuffle needed to get
14025// the elements of the vector extract in the correct position
14026// as specified by the CorrectElems encoding.
14028 SDValue Input, uint64_t Elems,
14029 uint64_t CorrectElems) {
14030 SDLoc dl(N);
14031
14032 unsigned NumElems = Input.getValueType().getVectorNumElements();
14033 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14034
14035 // Knowing the element indices being extracted from the original
14036 // vector and the order in which they're being inserted, just put
14037 // them at element indices required for the instruction.
14038 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14039 if (DAG.getDataLayout().isLittleEndian())
14040 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14041 else
14042 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14044 Elems = Elems >> 8;
14045 }
14046
14047 SDValue Shuffle =
14048 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14049 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14050
14051 EVT VT = N->getValueType(0);
14052 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14053
14055 Input.getValueType().getVectorElementType(),
14057 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14058 DAG.getValueType(ExtVT));
14059}
14060
14061// Look for build vector patterns where input operands come from sign
14062// extended vector_extract elements of specific indices. If the correct indices
14063// aren't used, add a vector shuffle to fix up the indices and create
14064// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14065// during instruction selection.
14067 // This array encodes the indices that the vector sign extend instructions
14068 // extract from when extending from one type to another for both BE and LE.
14069 // The right nibble of each byte corresponds to the LE incides.
14070 // and the left nibble of each byte corresponds to the BE incides.
14071 // For example: 0x3074B8FC byte->word
14072 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14073 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14074 // For example: 0x000070F8 byte->double word
14075 // For LE: the allowed indices are: 0x0,0x8
14076 // For BE: the allowed indices are: 0x7,0xF
14077 uint64_t TargetElems[] = {
14078 0x3074B8FC, // b->w
14079 0x000070F8, // b->d
14080 0x10325476, // h->w
14081 0x00003074, // h->d
14082 0x00001032, // w->d
14083 };
14084
14085 uint64_t Elems = 0;
14086 int Index;
14087 SDValue Input;
14088
14089 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14090 if (!Op)
14091 return false;
14092 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14093 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14094 return false;
14095
14096 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14097 // of the right width.
14098 SDValue Extract = Op.getOperand(0);
14099 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14100 Extract = Extract.getOperand(0);
14101 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14102 return false;
14103
14105 if (!ExtOp)
14106 return false;
14107
14108 Index = ExtOp->getZExtValue();
14109 if (Input && Input != Extract.getOperand(0))
14110 return false;
14111
14112 if (!Input)
14113 Input = Extract.getOperand(0);
14114
14115 Elems = Elems << 8;
14116 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14117 Elems |= Index;
14118
14119 return true;
14120 };
14121
14122 // If the build vector operands aren't sign extended vector extracts,
14123 // of the same input vector, then return.
14124 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14125 if (!isSExtOfVecExtract(N->getOperand(i))) {
14126 return SDValue();
14127 }
14128 }
14129
14130 // If the vector extract indicies are not correct, add the appropriate
14131 // vector_shuffle.
14132 int TgtElemArrayIdx;
14133 int InputSize = Input.getValueType().getScalarSizeInBits();
14134 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14135 if (InputSize + OutputSize == 40)
14136 TgtElemArrayIdx = 0;
14137 else if (InputSize + OutputSize == 72)
14138 TgtElemArrayIdx = 1;
14139 else if (InputSize + OutputSize == 48)
14140 TgtElemArrayIdx = 2;
14141 else if (InputSize + OutputSize == 80)
14142 TgtElemArrayIdx = 3;
14143 else if (InputSize + OutputSize == 96)
14144 TgtElemArrayIdx = 4;
14145 else
14146 return SDValue();
14147
14150 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14151 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14152 if (Elems != CorrectElems) {
14153 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14154 }
14155
14156 // Regular lowering will catch cases where a shuffle is not needed.
14157 return SDValue();
14158}
14159
14160SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14161 DAGCombinerInfo &DCI) const {
14162 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14163 "Should be called with a BUILD_VECTOR node");
14164
14165 SelectionDAG &DAG = DCI.DAG;
14166 SDLoc dl(N);
14167
14168 if (!Subtarget.hasVSX())
14169 return SDValue();
14170
14171 // The target independent DAG combiner will leave a build_vector of
14172 // float-to-int conversions intact. We can generate MUCH better code for
14173 // a float-to-int conversion of a vector of floats.
14174 SDValue FirstInput = N->getOperand(0);
14175 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14176 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14177 if (Reduced)
14178 return Reduced;
14179 }
14180
14181 // If we're building a vector out of consecutive loads, just load that
14182 // vector type.
14184 if (Reduced)
14185 return Reduced;
14186
14187 // If we're building a vector out of extended elements from another vector
14188 // we have P9 vector integer extend instructions. The code assumes legal
14189 // input types (i.e. it can't handle things like v4i16) so do not run before
14190 // legalization.
14191 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14193 if (Reduced)
14194 return Reduced;
14195 }
14196
14197
14198 if (N->getValueType(0) != MVT::v2f64)
14199 return SDValue();
14200
14201 // Looking for:
14202 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14203 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14204 FirstInput.getOpcode() != ISD::UINT_TO_FP)
14205 return SDValue();
14206 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14207 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14208 return SDValue();
14209 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14210 return SDValue();
14211
14212 SDValue Ext1 = FirstInput.getOperand(0);
14213 SDValue Ext2 = N->getOperand(1).getOperand(0);
14214 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14215 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14216 return SDValue();
14217
14220 if (!Ext1Op || !Ext2Op)
14221 return SDValue();
14222 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14223 Ext1.getOperand(0) != Ext2.getOperand(0))
14224 return SDValue();
14225
14226 int FirstElem = Ext1Op->getZExtValue();
14227 int SecondElem = Ext2Op->getZExtValue();
14228 int SubvecIdx;
14229 if (FirstElem == 0 && SecondElem == 1)
14230 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14231 else if (FirstElem == 2 && SecondElem == 3)
14232 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14233 else
14234 return SDValue();
14235
14236 SDValue SrcVec = Ext1.getOperand(0);
14237 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14239 return DAG.getNode(NodeType, dl, MVT::v2f64,
14241}
14242
14243SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14244 DAGCombinerInfo &DCI) const {
14245 assert((N->getOpcode() == ISD::SINT_TO_FP ||
14246 N->getOpcode() == ISD::UINT_TO_FP) &&
14247 "Need an int -> FP conversion node here");
14248
14249 if (useSoftFloat() || !Subtarget.has64BitSupport())
14250 return SDValue();
14251
14252 SelectionDAG &DAG = DCI.DAG;
14253 SDLoc dl(N);
14254 SDValue Op(N, 0);
14255
14256 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14257 // from the hardware.
14258 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14259 return SDValue();
14260 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14261 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14262 return SDValue();
14263
14264 SDValue FirstOperand(Op.getOperand(0));
14265 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14266 (FirstOperand.getValueType() == MVT::i8 ||
14267 FirstOperand.getValueType() == MVT::i16);
14268 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14269 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14270 bool DstDouble = Op.getValueType() == MVT::f64;
14271 unsigned ConvOp = Signed ?
14273 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14275 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14276 dl, false);
14278 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14281 Ops, MVT::i8, LDN->getMemOperand());
14282
14283 // For signed conversion, we need to sign-extend the value in the VSR
14284 if (Signed) {
14285 SDValue ExtOps[] = { Ld, WidthConst };
14286 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14287 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14288 } else
14289 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14290 }
14291
14292
14293 // For i32 intermediate values, unfortunately, the conversion functions
14294 // leave the upper 32 bits of the value are undefined. Within the set of
14295 // scalar instructions, we have no method for zero- or sign-extending the
14296 // value. Thus, we cannot handle i32 intermediate values here.
14297 if (Op.getOperand(0).getValueType() == MVT::i32)
14298 return SDValue();
14299
14300 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14301 "UINT_TO_FP is supported only with FPCVT");
14302
14303 // If we have FCFIDS, then use it when converting to single-precision.
14304 // Otherwise, convert to double-precision and then round.
14305 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14306 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14308 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14309 : PPCISD::FCFID);
14310 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14311 ? MVT::f32
14312 : MVT::f64;
14313
14314 // If we're converting from a float, to an int, and back to a float again,
14315 // then we don't need the store/load pair at all.
14316 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14317 Subtarget.hasFPCVT()) ||
14318 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14319 SDValue Src = Op.getOperand(0).getOperand(0);
14320 if (Src.getValueType() == MVT::f32) {
14321 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14322 DCI.AddToWorklist(Src.getNode());
14323 } else if (Src.getValueType() != MVT::f64) {
14324 // Make sure that we don't pick up a ppc_fp128 source value.
14325 return SDValue();
14326 }
14327
14328 unsigned FCTOp =
14329 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14331
14332 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14333 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14334
14335 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14336 FP = DAG.getNode(ISD::FP_ROUND, dl,
14337 MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14338 DCI.AddToWorklist(FP.getNode());
14339 }
14340
14341 return FP;
14342 }
14343
14344 return SDValue();
14345}
14346
14347// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14348// builtins) into loads with swaps.
14350 DAGCombinerInfo &DCI) const {
14351 SelectionDAG &DAG = DCI.DAG;
14352 SDLoc dl(N);
14353 SDValue Chain;
14354 SDValue Base;
14355 MachineMemOperand *MMO;
14356
14357 switch (N->getOpcode()) {
14358 default:
14359 llvm_unreachable("Unexpected opcode for little endian VSX load");
14360 case ISD::LOAD: {
14362 Chain = LD->getChain();
14363 Base = LD->getBasePtr();
14364 MMO = LD->getMemOperand();
14365 // If the MMO suggests this isn't a load of a full vector, leave
14366 // things alone. For a built-in, we have to make the change for
14367 // correctness, so if there is a size problem that will be a bug.
14368 if (MMO->getSize() < 16)
14369 return SDValue();
14370 break;
14371 }
14374 Chain = Intrin->getChain();
14375 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14376 // us what we want. Get operand 2 instead.
14377 Base = Intrin->getOperand(2);
14378 MMO = Intrin->getMemOperand();
14379 break;
14380 }
14381 }
14382
14383 MVT VecTy = N->getValueType(0).getSimpleVT();
14384
14385 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14386 // aligned and the type is a vector with elements up to 4 bytes
14387 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14388 VecTy.getScalarSizeInBits() <= 32) {
14389 return SDValue();
14390 }
14391
14392 SDValue LoadOps[] = { Chain, Base };
14395 LoadOps, MVT::v2f64, MMO);
14396
14397 DCI.AddToWorklist(Load.getNode());
14398 Chain = Load.getValue(1);
14399 SDValue Swap = DAG.getNode(
14400 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14401 DCI.AddToWorklist(Swap.getNode());
14402
14403 // Add a bitcast if the resulting load type doesn't match v2f64.
14404 if (VecTy != MVT::v2f64) {
14405 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14406 DCI.AddToWorklist(N.getNode());
14407 // Package {bitcast value, swap's chain} to match Load's shape.
14408 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14409 N, Swap.getValue(1));
14410 }
14411
14412 return Swap;
14413}
14414
14415// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14416// builtins) into stores with swaps.
14418 DAGCombinerInfo &DCI) const {
14419 SelectionDAG &DAG = DCI.DAG;
14420 SDLoc dl(N);
14421 SDValue Chain;
14422 SDValue Base;
14423 unsigned SrcOpnd;
14424 MachineMemOperand *MMO;
14425
14426 switch (N->getOpcode()) {
14427 default:
14428 llvm_unreachable("Unexpected opcode for little endian VSX store");
14429 case ISD::STORE: {
14431 Chain = ST->getChain();
14432 Base = ST->getBasePtr();
14433 MMO = ST->getMemOperand();
14434 SrcOpnd = 1;
14435 // If the MMO suggests this isn't a store of a full vector, leave
14436 // things alone. For a built-in, we have to make the change for
14437 // correctness, so if there is a size problem that will be a bug.
14438 if (MMO->getSize() < 16)
14439 return SDValue();
14440 break;
14441 }
14442 case ISD::INTRINSIC_VOID: {
14444 Chain = Intrin->getChain();
14445 // Intrin->getBasePtr() oddly does not get what we want.
14446 Base = Intrin->getOperand(3);
14447 MMO = Intrin->getMemOperand();
14448 SrcOpnd = 2;
14449 break;
14450 }
14451 }
14452
14453 SDValue Src = N->getOperand(SrcOpnd);
14454 MVT VecTy = Src.getValueType().getSimpleVT();
14455
14456 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14457 // aligned and the type is a vector with elements up to 4 bytes
14458 if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14459 VecTy.getScalarSizeInBits() <= 32) {
14460 return SDValue();
14461 }
14462
14463 // All stores are done as v2f64 and possible bit cast.
14464 if (VecTy != MVT::v2f64) {
14465 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14466 DCI.AddToWorklist(Src.getNode());
14467 }
14468
14469 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14470 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14471 DCI.AddToWorklist(Swap.getNode());
14472 Chain = Swap.getValue(1);
14473 SDValue StoreOps[] = { Chain, Swap, Base };
14475 DAG.getVTList(MVT::Other),
14476 StoreOps, VecTy, MMO);
14477 DCI.AddToWorklist(Store.getNode());
14478 return Store;
14479}
14480
14481// Handle DAG combine for STORE (FP_TO_INT F).
14482SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14483 DAGCombinerInfo &DCI) const {
14484
14485 SelectionDAG &DAG = DCI.DAG;
14486 SDLoc dl(N);
14487 unsigned Opcode = N->getOperand(1).getOpcode();
14488
14489 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
14490 && "Not a FP_TO_INT Instruction!");
14491
14492 SDValue Val = N->getOperand(1).getOperand(0);
14493 EVT Op1VT = N->getOperand(1).getValueType();
14494 EVT ResVT = Val.getValueType();
14495
14496 // Floating point types smaller than 32 bits are not legal on Power.
14497 if (ResVT.getScalarSizeInBits() < 32)
14498 return SDValue();
14499
14500 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14502 (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14503 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14504
14505 if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14506 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14507 return SDValue();
14508
14509 // Extend f32 values to f64
14510 if (ResVT.getScalarSizeInBits() == 32) {
14511 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14512 DCI.AddToWorklist(Val.getNode());
14513 }
14514
14515 // Set signed or unsigned conversion opcode.
14516 unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14518 PPCISD::FP_TO_UINT_IN_VSR;
14519
14520 Val = DAG.getNode(ConvOpcode,
14521 dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14522 DCI.AddToWorklist(Val.getNode());
14523
14524 // Set number of bytes being converted.
14525 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14526 SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14527 DAG.getIntPtrConstant(ByteSize, dl, false),
14528 DAG.getValueType(Op1VT) };
14529
14531 DAG.getVTList(MVT::Other), Ops,
14532 cast<StoreSDNode>(N)->getMemoryVT(),
14533 cast<StoreSDNode>(N)->getMemOperand());
14534
14535 DCI.AddToWorklist(Val.getNode());
14536 return Val;
14537}
14538
14539static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14540 // Check that the source of the element keeps flipping
14541 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14542 bool PrevElemFromFirstVec = Mask[0] < NumElts;
14543 for (int i = 1, e = Mask.size(); i < e; i++) {
14544 if (PrevElemFromFirstVec && Mask[i] < NumElts)
14545 return false;
14546 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14547 return false;
14549 }
14550 return true;
14551}
14552
14553static bool isSplatBV(SDValue Op) {
14554 if (Op.getOpcode() != ISD::BUILD_VECTOR)
14555 return false;
14556 SDValue FirstOp;
14557
14558 // Find first non-undef input.
14559 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14560 FirstOp = Op.getOperand(i);
14561 if (!FirstOp.isUndef())
14562 break;
14563 }
14564
14565 // All inputs are undef or the same as the first non-undef input.
14566 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14567 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14568 return false;
14569 return true;
14570}
14571
14573 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14574 return Op;
14575 if (Op.getOpcode() != ISD::BITCAST)
14576 return SDValue();
14577 Op = Op.getOperand(0);
14578 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14579 return Op;
14580 return SDValue();
14581}
14582
14584 int LHSMaxIdx, int RHSMinIdx,
14585 int RHSMaxIdx, int HalfVec) {
14586 for (int i = 0, e = ShuffV.size(); i < e; i++) {
14587 int Idx = ShuffV[i];
14588 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14589 ShuffV[i] += HalfVec;
14590 }
14591 return;
14592}
14593
14594// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14595// the original is:
14596// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14597// In such a case, just change the shuffle mask to extract the element
14598// from the permuted index.
14600 SDLoc dl(OrigSToV);
14601 EVT VT = OrigSToV.getValueType();
14602 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14603 "Expecting a SCALAR_TO_VECTOR here");
14604 SDValue Input = OrigSToV.getOperand(0);
14605
14606 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14607 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14608 SDValue OrigVector = Input.getOperand(0);
14609
14610 // Can't handle non-const element indices or different vector types
14611 // for the input to the extract and the output of the scalar_to_vector.
14612 if (Idx && VT == OrigVector.getValueType()) {
14614 NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
14615 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14616 }
14617 }
14618 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14619 OrigSToV.getOperand(0));
14620}
14621
14622// On little endian subtargets, combine shuffles such as:
14623// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14624// into:
14625// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14626// because the latter can be matched to a single instruction merge.
14627// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14628// to put the value into element zero. Adjust the shuffle mask so that the
14629// vector can remain in permuted form (to prevent a swap prior to a shuffle).
14630SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14631 SelectionDAG &DAG) const {
14632 SDValue LHS = SVN->getOperand(0);
14633 SDValue RHS = SVN->getOperand(1);
14634 auto Mask = SVN->getMask();
14636 SDValue Res(SVN, 0);
14637 SDLoc dl(SVN);
14638
14639 // None of these combines are useful on big endian systems since the ISA
14640 // already has a big endian bias.
14641 if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14642 return Res;
14643
14644 // If this is not a shuffle of a shuffle and the first element comes from
14645 // the second vector, canonicalize to the commuted form. This will make it
14646 // more likely to match one of the single instruction patterns.
14647 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14648 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14649 std::swap(LHS, RHS);
14650 Res = DAG.getCommutedVectorShuffle(*SVN);
14651 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14652 }
14653
14654 // Adjust the shuffle mask if either input vector comes from a
14655 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14656 // form (to prevent the need for a swap).
14657 SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14660 if (SToVLHS || SToVRHS) {
14661 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14662 : SToVRHS.getValueType().getVectorNumElements();
14663 int NumEltsOut = ShuffV.size();
14664
14665 // Initially assume that neither input is permuted. These will be adjusted
14666 // accordingly if either input is.
14667 int LHSMaxIdx = -1;
14668 int RHSMinIdx = -1;
14669 int RHSMaxIdx = -1;
14670 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14671
14672 // Get the permuted scalar to vector nodes for the source(s) that come from
14673 // ISD::SCALAR_TO_VECTOR.
14674 if (SToVLHS) {
14675 // Set up the values for the shuffle vector fixup.
14678 if (SToVLHS.getValueType() != LHS.getValueType())
14679 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14680 LHS = SToVLHS;
14681 }
14682 if (SToVRHS) {
14686 if (SToVRHS.getValueType() != RHS.getValueType())
14687 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14688 RHS = SToVRHS;
14689 }
14690
14691 // Fix up the shuffle mask to reflect where the desired element actually is.
14692 // The minimum and maximum indices that correspond to element zero for both
14693 // the LHS and RHS are computed and will control which shuffle mask entries
14694 // are to be changed. For example, if the RHS is permuted, any shuffle mask
14695 // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14696 // HalfVec to refer to the corresponding element in the permuted vector.
14698 HalfVec);
14699 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14700
14701 // We may have simplified away the shuffle. We won't be able to do anything
14702 // further with it here.
14703 if (!isa<ShuffleVectorSDNode>(Res))
14704 return Res;
14705 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14706 }
14707
14708 // The common case after we commuted the shuffle is that the RHS is a splat
14709 // and we have elements coming in from the splat at indices that are not
14710 // conducive to using a merge.
14711 // Example:
14712 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14713 if (!isSplatBV(RHS))
14714 return Res;
14715
14716 // We are looking for a mask such that all even elements are from
14717 // one vector and all odd elements from the other.
14718 if (!isAlternatingShuffMask(Mask, NumElts))
14719 return Res;
14720
14721 // Adjust the mask so we are pulling in the same index from the splat
14722 // as the index from the interesting vector in consecutive elements.
14723 // Example (even elements from first vector):
14724 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14725 if (Mask[0] < NumElts)
14726 for (int i = 1, e = Mask.size(); i < e; i += 2)
14727 ShuffV[i] = (ShuffV[i - 1] + NumElts);
14728 // Example (odd elements from first vector):
14729 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14730 else
14731 for (int i = 0, e = Mask.size(); i < e; i += 2)
14732 ShuffV[i] = (ShuffV[i + 1] + NumElts);
14733
14734 // If the RHS has undefs, we need to remove them since we may have created
14735 // a shuffle that adds those instead of the splat value.
14736 SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14737 RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14738
14739 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14740 return Res;
14741}
14742
14743SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14745 DAGCombinerInfo &DCI) const {
14747 "Not a reverse memop pattern!");
14748
14749 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14750 auto Mask = SVN->getMask();
14751 int i = 0;
14752 auto I = Mask.rbegin();
14753 auto E = Mask.rend();
14754
14755 for (; I != E; ++I) {
14756 if (*I != i)
14757 return false;
14758 i++;
14759 }
14760 return true;
14761 };
14762
14763 SelectionDAG &DAG = DCI.DAG;
14764 EVT VT = SVN->getValueType(0);
14765
14766 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14767 return SDValue();
14768
14769 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14770 // See comment in PPCVSXSwapRemoval.cpp.
14771 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14772 if (!Subtarget.hasP9Vector())
14773 return SDValue();
14774
14775 if(!IsElementReverse(SVN))
14776 return SDValue();
14777
14778 if (LSBase->getOpcode() == ISD::LOAD) {
14779 SDLoc dl(SVN);
14780 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14781 return DAG.getMemIntrinsicNode(
14783 LSBase->getMemoryVT(), LSBase->getMemOperand());
14784 }
14785
14786 if (LSBase->getOpcode() == ISD::STORE) {
14787 SDLoc dl(LSBase);
14788 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14789 LSBase->getBasePtr()};
14790 return DAG.getMemIntrinsicNode(
14792 LSBase->getMemoryVT(), LSBase->getMemOperand());
14793 }
14794
14795 llvm_unreachable("Expected a load or store node here");
14796}
14797
14799 DAGCombinerInfo &DCI) const {
14800 SelectionDAG &DAG = DCI.DAG;
14801 SDLoc dl(N);
14802 switch (N->getOpcode()) {
14803 default: break;
14804 case ISD::ADD:
14805 return combineADD(N, DCI);
14806 case ISD::SHL:
14807 return combineSHL(N, DCI);
14808 case ISD::SRA:
14809 return combineSRA(N, DCI);
14810 case ISD::SRL:
14811 return combineSRL(N, DCI);
14812 case ISD::MUL:
14813 return combineMUL(N, DCI);
14814 case ISD::FMA:
14815 case PPCISD::FNMSUB:
14816 return combineFMALike(N, DCI);
14817 case PPCISD::SHL:
14818 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14819 return N->getOperand(0);
14820 break;
14821 case PPCISD::SRL:
14822 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14823 return N->getOperand(0);
14824 break;
14825 case PPCISD::SRA:
14826 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14827 if (C->isNullValue() || // 0 >>s V -> 0.
14828 C->isAllOnesValue()) // -1 >>s V -> -1.
14829 return N->getOperand(0);
14830 }
14831 break;
14832 case ISD::SIGN_EXTEND:
14833 case ISD::ZERO_EXTEND:
14834 case ISD::ANY_EXTEND:
14835 return DAGCombineExtBoolTrunc(N, DCI);
14836 case ISD::TRUNCATE:
14837 return combineTRUNCATE(N, DCI);
14838 case ISD::SETCC:
14839 if (SDValue CSCC = combineSetCC(N, DCI))
14840 return CSCC;
14842 case ISD::SELECT_CC:
14843 return DAGCombineTruncBoolExt(N, DCI);
14844 case ISD::SINT_TO_FP:
14845 case ISD::UINT_TO_FP:
14846 return combineFPToIntToFP(N, DCI);
14848 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14849 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14850 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14851 }
14852 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14853 case ISD::STORE: {
14854
14855 EVT Op1VT = N->getOperand(1).getValueType();
14856 unsigned Opcode = N->getOperand(1).getOpcode();
14857
14858 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14859 SDValue Val= combineStoreFPToInt(N, DCI);
14860 if (Val)
14861 return Val;
14862 }
14863
14864 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14866 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14867 if (Val)
14868 return Val;
14869 }
14870
14871 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14872 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14873 N->getOperand(1).getNode()->hasOneUse() &&
14874 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14875 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14876
14877 // STBRX can only handle simple types and it makes no sense to store less
14878 // two bytes in byte-reversed order.
14879 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14880 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14881 break;
14882
14883 SDValue BSwapOp = N->getOperand(1).getOperand(0);
14884 // Do an any-extend to 32-bits if this is a half-word input.
14885 if (BSwapOp.getValueType() == MVT::i16)
14887
14888 // If the type of BSWAP operand is wider than stored memory width
14889 // it need to be shifted to the right side before STBRX.
14890 if (Op1VT.bitsGT(mVT)) {
14891 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14892 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14893 DAG.getConstant(Shift, dl, MVT::i32));
14894 // Need to truncate if this is a bswap of i64 stored as i32/i16.
14895 if (Op1VT == MVT::i64)
14897 }
14898
14899 SDValue Ops[] = {
14900 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14901 };
14902 return
14904 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14905 cast<StoreSDNode>(N)->getMemOperand());
14906 }
14907
14908 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
14909 // So it can increase the chance of CSE constant construction.
14910 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14911 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14912 // Need to sign-extended to 64-bits to handle negative values.
14913 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14914 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14915 MemVT.getSizeInBits());
14917
14918 // DAG.getTruncStore() can't be used here because it doesn't accept
14919 // the general (base + offset) addressing mode.
14920 // So we use UpdateNodeOperands and setTruncatingStore instead.
14921 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14922 N->getOperand(3));
14923 cast<StoreSDNode>(N)->setTruncatingStore(true);
14924 return SDValue(N, 0);
14925 }
14926
14927 // For little endian, VSX stores require generating xxswapd/lxvd2x.
14928 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14929 if (Op1VT.isSimple()) {
14930 MVT StoreVT = Op1VT.getSimpleVT();
14931 if (Subtarget.needsSwapsForVSXMemOps() &&
14934 return expandVSXStoreForLE(N, DCI);
14935 }
14936 break;
14937 }
14938 case ISD::LOAD: {
14940 EVT VT = LD->getValueType(0);
14941
14942 // For little endian, VSX loads require generating lxvd2x/xxswapd.
14943 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14944 if (VT.isSimple()) {
14945 MVT LoadVT = VT.getSimpleVT();
14946 if (Subtarget.needsSwapsForVSXMemOps() &&
14947 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14949 return expandVSXLoadForLE(N, DCI);
14950 }
14951
14952 // We sometimes end up with a 64-bit integer load, from which we extract
14953 // two single-precision floating-point numbers. This happens with
14954 // std::complex<float>, and other similar structures, because of the way we
14955 // canonicalize structure copies. However, if we lack direct moves,
14956 // then the final bitcasts from the extracted integer values to the
14957 // floating-point numbers turn into store/load pairs. Even with direct moves,
14958 // just loading the two floating-point numbers is likely better.
14959 auto ReplaceTwoFloatLoad = [&]() {
14960 if (VT != MVT::i64)
14961 return false;
14962
14963 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14964 LD->isVolatile())
14965 return false;
14966
14967 // We're looking for a sequence like this:
14968 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14969 // t16: i64 = srl t13, Constant:i32<32>
14970 // t17: i32 = truncate t16
14971 // t18: f32 = bitcast t17
14972 // t19: i32 = truncate t13
14973 // t20: f32 = bitcast t19
14974
14975 if (!LD->hasNUsesOfValue(2, 0))
14976 return false;
14977
14978 auto UI = LD->use_begin();
14979 while (UI.getUse().getResNo() != 0) ++UI;
14980 SDNode *Trunc = *UI++;
14981 while (UI.getUse().getResNo() != 0) ++UI;
14982 SDNode *RightShift = *UI;
14983 if (Trunc->getOpcode() != ISD::TRUNCATE)
14984 std::swap(Trunc, RightShift);
14985
14986 if (Trunc->getOpcode() != ISD::TRUNCATE ||
14987 Trunc->getValueType(0) != MVT::i32 ||
14988 !Trunc->hasOneUse())
14989 return false;
14990 if (RightShift->getOpcode() != ISD::SRL ||
14991 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14992 RightShift->getConstantOperandVal(1) != 32 ||
14993 !RightShift->hasOneUse())
14994 return false;
14995
14996 SDNode *Trunc2 = *RightShift->use_begin();
14997 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14998 Trunc2->getValueType(0) != MVT::i32 ||
14999 !Trunc2->hasOneUse())
15000 return false;
15001
15002 SDNode *Bitcast = *Trunc->use_begin();
15003 SDNode *Bitcast2 = *Trunc2->use_begin();
15004
15005 if (Bitcast->getOpcode() != ISD::BITCAST ||
15006 Bitcast->getValueType(0) != MVT::f32)
15007 return false;
15008 if (Bitcast2->getOpcode() != ISD::BITCAST ||
15009 Bitcast2->getValueType(0) != MVT::f32)
15010 return false;
15011
15012 if (Subtarget.isLittleEndian())
15013 std::swap(Bitcast, Bitcast2);
15014
15015 // Bitcast has the second float (in memory-layout order) and Bitcast2
15016 // has the first one.
15017
15018 SDValue BasePtr = LD->getBasePtr();
15019 if (LD->isIndexed()) {
15020 assert(LD->getAddressingMode() == ISD::PRE_INC &&
15021 "Non-pre-inc AM on PPC?");
15022 BasePtr =
15023 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15024 LD->getOffset());
15025 }
15026
15027 auto MMOFlags =
15028 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15029 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15030 LD->getPointerInfo(), LD->getAlignment(),
15031 MMOFlags, LD->getAAInfo());
15032 SDValue AddPtr =
15033 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15034 BasePtr, DAG.getIntPtrConstant(4, dl));
15036 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15037 LD->getPointerInfo().getWithOffset(4),
15038 MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
15039
15040 if (LD->isIndexed()) {
15041 // Note that DAGCombine should re-form any pre-increment load(s) from
15042 // what is produced here if that makes sense.
15043 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15044 }
15045
15046 DCI.CombineTo(Bitcast2, FloatLoad);
15047 DCI.CombineTo(Bitcast, FloatLoad2);
15048
15049 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15050 SDValue(FloatLoad2.getNode(), 1));
15051 return true;
15052 };
15053
15054 if (ReplaceTwoFloatLoad())
15055 return SDValue(N, 0);
15056
15057 EVT MemVT = LD->getMemoryVT();
15058 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15060 Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
15062 if (LD->isUnindexed() && VT.isVector() &&
15063 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
15064 // P8 and later hardware should just use LOAD.
15065 !Subtarget.hasP8Vector() &&
15066 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15067 VT == MVT::v4f32)) ||
15068 (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
15069 LD->getAlign() >= ScalarABIAlignment)) &&
15070 LD->getAlign() < ABIAlignment) {
15071 // This is a type-legal unaligned Altivec or QPX load.
15072 SDValue Chain = LD->getChain();
15073 SDValue Ptr = LD->getBasePtr();
15074 bool isLittleEndian = Subtarget.isLittleEndian();
15075
15076 // This implements the loading of unaligned vectors as described in
15077 // the venerable Apple Velocity Engine overview. Specifically:
15078 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15079 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15080 //
15081 // The general idea is to expand a sequence of one or more unaligned
15082 // loads into an alignment-based permutation-control instruction (lvsl
15083 // or lvsr), a series of regular vector loads (which always truncate
15084 // their input address to an aligned address), and a series of
15085 // permutations. The results of these permutations are the requested
15086 // loaded values. The trick is that the last "extra" load is not taken
15087 // from the address you might suspect (sizeof(vector) bytes after the
15088 // last requested load), but rather sizeof(vector) - 1 bytes after the
15089 // last requested vector. The point of this is to avoid a page fault if
15090 // the base address happened to be aligned. This works because if the
15091 // base address is aligned, then adding less than a full vector length
15092 // will cause the last vector in the sequence to be (re)loaded.
15093 // Otherwise, the next vector will be fetched as you might suspect was
15094 // necessary.
15095
15096 // We might be able to reuse the permutation generation from
15097 // a different base address offset from this one by an aligned amount.
15098 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15099 // optimization later.
15102 if (Subtarget.hasAltivec()) {
15103 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
15104 Intrinsic::ppc_altivec_lvsl;
15105 IntrLD = Intrinsic::ppc_altivec_lvx;
15106 IntrPerm = Intrinsic::ppc_altivec_vperm;
15109 LDTy = MVT::v4i32;
15110 } else {
15111 Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
15112 Intrinsic::ppc_qpx_qvlpcls;
15113 IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
15114 Intrinsic::ppc_qpx_qvlfs;
15115 IntrPerm = Intrinsic::ppc_qpx_qvfperm;
15118 LDTy = MemVT.getSimpleVT();
15119 }
15120
15122
15123 // Create the new MMO for the new base load. It is like the original MMO,
15124 // but represents an area in memory almost twice the vector size centered
15125 // on the original address. If the address is unaligned, we might start
15126 // reading up to (sizeof(vector)-1) bytes below the address of the
15127 // original unaligned load.
15130 MF.getMachineMemOperand(LD->getMemOperand(),
15131 -(long)MemVT.getStoreSize()+1,
15132 2*MemVT.getStoreSize()-1);
15133
15134 // Create the new base load.
15137 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15142
15143 // Note that the value of IncOffset (which is provided to the next
15144 // load's pointer info offset value, and thus used to calculate the
15145 // alignment), and the value of IncValue (which is actually used to
15146 // increment the pointer value) are different! This is because we
15147 // require the next load to appear to be aligned, even though it
15148 // is actually offset from the base pointer by a lesser amount.
15149 int IncOffset = VT.getSizeInBits() / 8;
15150 int IncValue = IncOffset;
15151
15152 // Walk (both up and down) the chain looking for another load at the real
15153 // (aligned) offset (the alignment of the other load does not matter in
15154 // this case). If found, then do not use the offset reduction trick, as
15155 // that will prevent the loads from being later combined (as they would
15156 // otherwise be duplicates).
15157 if (!findConsecutiveLoad(LD, DAG))
15158 --IncValue;
15159
15160 SDValue Increment =
15162 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15163
15165 MF.getMachineMemOperand(LD->getMemOperand(),
15166 1, 2*MemVT.getStoreSize()-1);
15167 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15172
15174 BaseLoad.getValue(1), ExtraLoad.getValue(1));
15175
15176 // Because vperm has a big-endian bias, we must reverse the order
15177 // of the input vectors and complement the permute control vector
15178 // when generating little endian code. We have already handled the
15179 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15180 // and ExtraLoad here.
15181 SDValue Perm;
15182 if (isLittleEndian)
15184 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15185 else
15187 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15188
15189 if (VT != PermTy)
15190 Perm = Subtarget.hasAltivec() ?
15191 DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
15192 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
15193 DAG.getTargetConstant(1, dl, MVT::i64));
15194 // second argument is 1 because this rounding
15195 // is always exact.
15196
15197 // The output of the permutation is our loaded result, the TokenFactor is
15198 // our new chain.
15199 DCI.CombineTo(N, Perm, TF);
15200 return SDValue(N, 0);
15201 }
15202 }
15203 break;
15205 bool isLittleEndian = Subtarget.isLittleEndian();
15206 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15207 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15208 : Intrinsic::ppc_altivec_lvsl);
15209 if ((IID == Intr ||
15210 IID == Intrinsic::ppc_qpx_qvlpcld ||
15211 IID == Intrinsic::ppc_qpx_qvlpcls) &&
15212 N->getOperand(1)->getOpcode() == ISD::ADD) {
15213 SDValue Add = N->getOperand(1);
15214
15215 int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
15216 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
15217
15218 if (DAG.MaskedValueIsZero(Add->getOperand(1),
15219 APInt::getAllOnesValue(Bits /* alignment */)
15220 .zext(Add.getScalarValueSizeInBits()))) {
15221 SDNode *BasePtr = Add->getOperand(0).getNode();
15222 for (SDNode::use_iterator UI = BasePtr->use_begin(),
15223 UE = BasePtr->use_end();
15224 UI != UE; ++UI) {
15225 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15226 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
15227 // We've found another LVSL/LVSR, and this address is an aligned
15228 // multiple of that one. The results will be the same, so use the
15229 // one we've just found instead.
15230
15231 return SDValue(*UI, 0);
15232 }
15233 }
15234 }
15235
15236 if (isa<ConstantSDNode>(Add->getOperand(1))) {
15237 SDNode *BasePtr = Add->getOperand(0).getNode();
15238 for (SDNode::use_iterator UI = BasePtr->use_begin(),
15239 UE = BasePtr->use_end(); UI != UE; ++UI) {
15240 if (UI->getOpcode() == ISD::ADD &&
15241 isa<ConstantSDNode>(UI->getOperand(1)) &&
15242 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15243 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
15244 (1ULL << Bits) == 0) {
15245 SDNode *OtherAdd = *UI;
15246 for (SDNode::use_iterator VI = OtherAdd->use_begin(),
15247 VE = OtherAdd->use_end(); VI != VE; ++VI) {
15248 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15249 cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
15250 return SDValue(*VI, 0);
15251 }
15252 }
15253 }
15254 }
15255 }
15256 }
15257
15258 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15259 // Expose the vabsduw/h/b opportunity for down stream
15260 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15261 (IID == Intrinsic::ppc_altivec_vmaxsw ||
15262 IID == Intrinsic::ppc_altivec_vmaxsh ||
15263 IID == Intrinsic::ppc_altivec_vmaxsb)) {
15264 SDValue V1 = N->getOperand(1);
15265 SDValue V2 = N->getOperand(2);
15266 if ((V1.getSimpleValueType() == MVT::v4i32 ||
15267 V1.getSimpleValueType() == MVT::v8i16 ||
15268 V1.getSimpleValueType() == MVT::v16i8) &&
15269 V1.getSimpleValueType() == V2.getSimpleValueType()) {
15270 // (0-a, a)
15271 if (V1.getOpcode() == ISD::SUB &&
15272 ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
15273 V1.getOperand(1) == V2) {
15274 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15275 }
15276 // (a, 0-a)
15277 if (V2.getOpcode() == ISD::SUB &&
15278 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15279 V2.getOperand(1) == V1) {
15280 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15281 }
15282 // (x-y, y-x)
15283 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15284 V1.getOperand(0) == V2.getOperand(1) &&
15285 V1.getOperand(1) == V2.getOperand(0)) {
15286 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15287 }
15288 }
15289 }
15290 }
15291
15292 break;
15294 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15295 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15296 if (Subtarget.needsSwapsForVSXMemOps()) {
15297 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15298 default:
15299 break;
15300 case Intrinsic::ppc_vsx_lxvw4x:
15301 case Intrinsic::ppc_vsx_lxvd2x:
15302 return expandVSXLoadForLE(N, DCI);
15303 }
15304 }
15305 break;
15307 // For little endian, VSX stores require generating xxswapd/stxvd2x.
15308 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15309 if (Subtarget.needsSwapsForVSXMemOps()) {
15310 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15311 default:
15312 break;
15313 case Intrinsic::ppc_vsx_stxvw4x:
15314 case Intrinsic::ppc_vsx_stxvd2x:
15315 return expandVSXStoreForLE(N, DCI);
15316 }
15317 }
15318 break;
15319 case ISD::BSWAP:
15320 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15321 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
15322 N->getOperand(0).hasOneUse() &&
15323 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15324 (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
15325 N->getValueType(0) == MVT::i64))) {
15326 SDValue Load = N->getOperand(0);
15327 LoadSDNode *LD = cast<LoadSDNode>(Load);
15328 // Create the byte-swapping load.
15329 SDValue Ops[] = {
15330 LD->getChain(), // Chain
15331 LD->getBasePtr(), // Ptr
15332 DAG.getValueType(N->getValueType(0)) // VT
15333 };
15334 SDValue BSLoad =
15336 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15338 Ops, LD->getMemoryVT(), LD->getMemOperand());
15339
15340 // If this is an i16 load, insert the truncate.
15342 if (N->getValueType(0) == MVT::i16)
15344
15345 // First, combine the bswap away. This makes the value produced by the
15346 // load dead.
15347 DCI.CombineTo(N, ResVal);
15348
15349 // Next, combine the load away, we give it a bogus result value but a real
15350 // chain result. The result value is dead because the bswap is dead.
15351 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15352
15353 // Return N so it doesn't get rechecked!
15354 return SDValue(N, 0);
15355 }
15356 break;
15357 case PPCISD::VCMP:
15358 // If a VCMPo node already exists with exactly the same operands as this
15359 // node, use its result instead of this node (VCMPo computes both a CR6 and
15360 // a normal output).
15361 //
15362 if (!N->getOperand(0).hasOneUse() &&
15363 !N->getOperand(1).hasOneUse() &&
15364 !N->getOperand(2).hasOneUse()) {
15365
15366 // Scan all of the users of the LHS, looking for VCMPo's that match.
15367 SDNode *VCMPoNode = nullptr;
15368
15369 SDNode *LHSN = N->getOperand(0).getNode();
15370 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15371 UI != E; ++UI)
15372 if (UI->getOpcode() == PPCISD::VCMPo &&
15373 UI->getOperand(1) == N->getOperand(1) &&
15374 UI->getOperand(2) == N->getOperand(2) &&
15375 UI->getOperand(0) == N->getOperand(0)) {
15376 VCMPoNode = *UI;
15377 break;
15378 }
15379
15380 // If there is no VCMPo node, or if the flag value has a single use, don't
15381 // transform this.
15382 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
15383 break;
15384
15385 // Look at the (necessarily single) use of the flag value. If it has a
15386 // chain, this transformation is more complex. Note that multiple things
15387 // could use the value result, which we should ignore.
15388 SDNode *FlagUser = nullptr;
15389 for (SDNode::use_iterator UI = VCMPoNode->use_begin();
15390 FlagUser == nullptr; ++UI) {
15391 assert(UI != VCMPoNode->use_end() && "Didn't find user!");
15392 SDNode *User = *UI;
15393 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15394 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
15395 FlagUser = User;
15396 break;
15397 }
15398 }
15399 }
15400
15401 // If the user is a MFOCRF instruction, we know this is safe.
15402 // Otherwise we give up for right now.
15403 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15404 return SDValue(VCMPoNode, 0);
15405 }
15406 break;
15407 case ISD::BRCOND: {
15408 SDValue Cond = N->getOperand(1);
15409 SDValue Target = N->getOperand(2);
15410
15411 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15412 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15413 Intrinsic::loop_decrement) {
15414
15415 // We now need to make the intrinsic dead (it cannot be instruction
15416 // selected).
15417 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15418 assert(Cond.getNode()->hasOneUse() &&
15419 "Counter decrement has more than one use");
15420
15421 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15422 N->getOperand(0), Target);
15423 }
15424 }
15425 break;
15426 case ISD::BR_CC: {
15427 // If this is a branch on an altivec predicate comparison, lower this so
15428 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
15429 // lowering is done pre-legalize, because the legalizer lowers the predicate
15430 // compare down to code that is difficult to reassemble.
15431 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15432 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15433
15434 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15435 // value. If so, pass-through the AND to get to the intrinsic.
15436 if (LHS.getOpcode() == ISD::AND &&
15438 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15439 Intrinsic::loop_decrement &&
15441 !isNullConstant(LHS.getOperand(1)))
15442 LHS = LHS.getOperand(0);
15443
15444 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15445 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15446 Intrinsic::loop_decrement &&
15447 isa<ConstantSDNode>(RHS)) {
15448 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
15449 "Counter decrement comparison is not EQ or NE");
15450
15451 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15452 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15453 (CC == ISD::SETNE && !Val);
15454
15455 // We now need to make the intrinsic dead (it cannot be instruction
15456 // selected).
15457 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15458 assert(LHS.getNode()->hasOneUse() &&
15459 "Counter decrement has more than one use");
15460
15461 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15462 N->getOperand(0), N->getOperand(4));
15463 }
15464
15465 int CompareOpc;
15466 bool isDot;
15467
15468 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15469 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15470 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15471 assert(isDot && "Can't compare against a vector result!");
15472
15473 // If this is a comparison against something other than 0/1, then we know
15474 // that the condition is never/always true.
15475 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15476 if (Val != 0 && Val != 1) {
15477 if (CC == ISD::SETEQ) // Cond never true, remove branch.
15478 return N->getOperand(0);
15479 // Always !=, turn it into an unconditional branch.
15480 return DAG.getNode(ISD::BR, dl, MVT::Other,
15481 N->getOperand(0), N->getOperand(4));
15482 }
15483
15484 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15485
15486 // Create the PPCISD altivec 'dot' comparison node.
15487 SDValue Ops[] = {
15488 LHS.getOperand(2), // LHS of compare
15489 LHS.getOperand(3), // RHS of compare
15491 };
15492 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15493 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
15494
15495 // Unpack the result based on how the target uses it.
15497 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15498 default: // Can't happen, don't crash on invalid number though.
15499 case 0: // Branch on the value of the EQ bit of CR6.
15501 break;
15502 case 1: // Branch on the inverted value of the EQ bit of CR6.
15504 break;
15505 case 2: // Branch on the value of the LT bit of CR6.
15507 break;
15508 case 3: // Branch on the inverted value of the LT bit of CR6.
15510 break;
15511 }
15512
15513 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15514 DAG.getConstant(CompOpc, dl, MVT::i32),
15515 DAG.getRegister(PPC::CR6, MVT::i32),
15516 N->getOperand(4), CompNode.getValue(1));
15517 }
15518 break;
15519 }
15520 case ISD::BUILD_VECTOR:
15521 return DAGCombineBuildVector(N, DCI);
15522 case ISD::ABS:
15523 return combineABS(N, DCI);
15524 case ISD::VSELECT:
15525 return combineVSelect(N, DCI);
15526 }
15527
15528 return SDValue();
15529}
15530
15531SDValue
15533 SelectionDAG &DAG,
15535 // fold (sdiv X, pow2)
15536 EVT VT = N->getValueType(0);
15537 if (VT == MVT::i64 && !Subtarget.isPPC64())
15538 return SDValue();
15539 if ((VT != MVT::i32 && VT != MVT::i64) ||
15540 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
15541 return SDValue();
15542
15543 SDLoc DL(N);
15544 SDValue N0 = N->getOperand(0);
15545
15546 bool IsNegPow2 = (-Divisor).isPowerOf2();
15547 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15548 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15549
15550 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15551 Created.push_back(Op.getNode());
15552
15553 if (IsNegPow2) {
15554 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15555 Created.push_back(Op.getNode());
15556 }
15557
15558 return Op;
15559}
15560
15561//===----------------------------------------------------------------------===//
15562// Inline Assembly Support
15563//===----------------------------------------------------------------------===//
15564
15566 KnownBits &Known,
15567 const APInt &DemandedElts,
15568 const SelectionDAG &DAG,
15569 unsigned Depth) const {
15570 Known.resetAll();
15571 switch (Op.getOpcode()) {
15572 default: break;
15573 case PPCISD::LBRX: {
15574 // lhbrx is known to have the top bits cleared out.
15575 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15576 Known.Zero = 0xFFFF0000;
15577 break;
15578 }
15580 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15581 default: break;
15582 case Intrinsic::ppc_altivec_vcmpbfp_p:
15583 case Intrinsic::ppc_altivec_vcmpeqfp_p:
15584 case Intrinsic::ppc_altivec_vcmpequb_p:
15585 case Intrinsic::ppc_altivec_vcmpequh_p:
15586 case Intrinsic::ppc_altivec_vcmpequw_p:
15587 case Intrinsic::ppc_altivec_vcmpequd_p:
15588 case Intrinsic::ppc_altivec_vcmpgefp_p:
15589 case Intrinsic::ppc_altivec_vcmpgtfp_p:
15590 case Intrinsic::ppc_altivec_vcmpgtsb_p:
15591 case Intrinsic::ppc_altivec_vcmpgtsh_p:
15592 case Intrinsic::ppc_altivec_vcmpgtsw_p:
15593 case Intrinsic::ppc_altivec_vcmpgtsd_p:
15594 case Intrinsic::ppc_altivec_vcmpgtub_p:
15595 case Intrinsic::ppc_altivec_vcmpgtuh_p:
15596 case Intrinsic::ppc_altivec_vcmpgtuw_p:
15597 case Intrinsic::ppc_altivec_vcmpgtud_p:
15598 Known.Zero = ~1U; // All bits but the low one are known to be zero.
15599 break;
15600 }
15601 }
15602 }
15603}
15604
15606 switch (Subtarget.getCPUDirective()) {
15607 default: break;
15608 case PPC::DIR_970:
15609 case PPC::DIR_PWR4:
15610 case PPC::DIR_PWR5:
15611 case PPC::DIR_PWR5X:
15612 case PPC::DIR_PWR6:
15613 case PPC::DIR_PWR6X:
15614 case PPC::DIR_PWR7:
15615 case PPC::DIR_PWR8:
15616 case PPC::DIR_PWR9:
15617 case PPC::DIR_PWR10:
15618 case PPC::DIR_PWR_FUTURE: {
15619 if (!ML)
15620 break;
15621
15623 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15624 // so that we can decrease cache misses and branch-prediction misses.
15625 // Actual alignment of the loop will depend on the hotness check and other
15626 // logic in alignBlocks.
15627 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15628 return Align(32);
15629 }
15630
15631 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15632
15633 // For small loops (between 5 and 8 instructions), align to a 32-byte
15634 // boundary so that the entire loop fits in one instruction-cache line.
15635 uint64_t LoopSize = 0;
15636 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15637 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15638 LoopSize += TII->getInstSizeInBytes(*J);
15639 if (LoopSize > 32)
15640 break;
15641 }
15642
15643 if (LoopSize > 16 && LoopSize <= 32)
15644 return Align(32);
15645
15646 break;
15647 }
15648 }
15649
15651}
15652
15653/// getConstraintType - Given a constraint, return the type of
15654/// constraint it is for this target.
15657 if (Constraint.size() == 1) {
15658 switch (Constraint[0]) {
15659 default: break;
15660 case 'b':
15661 case 'r':
15662 case 'f':
15663 case 'd':
15664 case 'v':
15665 case 'y':
15666 return C_RegisterClass;
15667 case 'Z':
15668 // FIXME: While Z does indicate a memory constraint, it specifically
15669 // indicates an r+r address (used in conjunction with the 'y' modifier
15670 // in the replacement string). Currently, we're forcing the base
15671 // register to be r0 in the asm printer (which is interpreted as zero)
15672 // and forming the complete address in the second register. This is
15673 // suboptimal.
15674 return C_Memory;
15675 }
15676 } else if (Constraint == "wc") { // individual CR bits.
15677 return C_RegisterClass;
15678 } else if (Constraint == "wa" || Constraint == "wd" ||
15679 Constraint == "wf" || Constraint == "ws" ||
15680 Constraint == "wi" || Constraint == "ww") {
15681 return C_RegisterClass; // VSX registers.
15682 }
15683 return TargetLowering::getConstraintType(Constraint);
15684}
15685
15686/// Examine constraint type and operand type and determine a weight value.
15687/// This object must already have been set up with the operand type
15688/// and the current alternative constraint selected.
15691 AsmOperandInfo &info, const char *constraint) const {
15693 Value *CallOperandVal = info.CallOperandVal;
15694 // If we don't have a value, we can't do a match,
15695 // but allow it at the lowest weight.
15696 if (!CallOperandVal)
15697 return CW_Default;
15698 Type *type = CallOperandVal->getType();
15699
15700 // Look at the constraint type.
15701 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15702 return CW_Register; // an individual CR bit.
15703 else if ((StringRef(constraint) == "wa" ||
15704 StringRef(constraint) == "wd" ||
15705 StringRef(constraint) == "wf") &&
15706 type->isVectorTy())
15707 return CW_Register;
15708 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15709 return CW_Register; // just hold 64-bit integers data.
15710 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15711 return CW_Register;
15712 else if (StringRef(constraint) == "ww" && type->isFloatTy())
15713 return CW_Register;
15714
15715 switch (*constraint) {
15716 default:
15718 break;
15719 case 'b':
15720 if (type->isIntegerTy())
15721 weight = CW_Register;
15722 break;
15723 case 'f':
15724 if (type->isFloatTy())
15725 weight = CW_Register;
15726 break;
15727 case 'd':
15728 if (type->isDoubleTy())
15729 weight = CW_Register;
15730 break;
15731 case 'v':
15732 if (type->isVectorTy())
15733 weight = CW_Register;
15734 break;
15735 case 'y':
15736 weight = CW_Register;
15737 break;
15738 case 'Z':
15739 weight = CW_Memory;
15740 break;
15741 }
15742 return weight;
15743}
15744
15745std::pair<unsigned, const TargetRegisterClass *>
15747 StringRef Constraint,
15748 MVT VT) const {
15749 if (Constraint.size() == 1) {
15750 // GCC RS6000 Constraint Letters
15751 switch (Constraint[0]) {
15752 case 'b': // R1-R31
15753 if (VT == MVT::i64 && Subtarget.isPPC64())
15754 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15755 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15756 case 'r': // R0-R31
15757 if (VT == MVT::i64 && Subtarget.isPPC64())
15758 return std::make_pair(0U, &PPC::G8RCRegClass);
15759 return std::make_pair(0U, &PPC::GPRCRegClass);
15760 // 'd' and 'f' constraints are both defined to be "the floating point
15761 // registers", where one is for 32-bit and the other for 64-bit. We don't
15762 // really care overly much here so just give them all the same reg classes.
15763 case 'd':
15764 case 'f':
15765 if (Subtarget.hasSPE()) {
15766 if (VT == MVT::f32 || VT == MVT::i32)
15767 return std::make_pair(0U, &PPC::GPRCRegClass);
15768 if (VT == MVT::f64 || VT == MVT::i64)
15769 return std::make_pair(0U, &PPC::SPERCRegClass);
15770 } else {
15771 if (VT == MVT::f32 || VT == MVT::i32)
15772 return std::make_pair(0U, &PPC::F4RCRegClass);
15773 if (VT == MVT::f64 || VT == MVT::i64)
15774 return std::make_pair(0U, &PPC::F8RCRegClass);
15775 if (VT == MVT::v4f64 && Subtarget.hasQPX())
15776 return std::make_pair(0U, &PPC::QFRCRegClass);
15777 if (VT == MVT::v4f32 && Subtarget.hasQPX())
15778 return std::make_pair(0U, &PPC::QSRCRegClass);
15779 }
15780 break;
15781 case 'v':
15782 if (VT == MVT::v4f64 && Subtarget.hasQPX())
15783 return std::make_pair(0U, &PPC::QFRCRegClass);
15784 if (VT == MVT::v4f32 && Subtarget.hasQPX())
15785 return std::make_pair(0U, &PPC::QSRCRegClass);
15786 if (Subtarget.hasAltivec())
15787 return std::make_pair(0U, &PPC::VRRCRegClass);
15788 break;
15789 case 'y': // crrc
15790 return std::make_pair(0U, &PPC::CRRCRegClass);
15791 }
15792 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15793 // An individual CR bit.
15794 return std::make_pair(0U, &PPC::CRBITRCRegClass);
15795 } else if ((Constraint == "wa" || Constraint == "wd" ||
15796 Constraint == "wf" || Constraint == "wi") &&
15797 Subtarget.hasVSX()) {
15798 return std::make_pair(0U, &PPC::VSRCRegClass);
15799 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15800 if (VT == MVT::f32 && Subtarget.hasP8Vector())
15801 return std::make_pair(0U, &PPC::VSSRCRegClass);
15802 else
15803 return std::make_pair(0U, &PPC::VSFRCRegClass);
15804 }
15805
15806 // If we name a VSX register, we can't defer to the base class because it
15807 // will not recognize the correct register (their names will be VSL{0-31}
15808 // and V{0-31} so they won't match). So we match them here.
15809 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15810 int VSNum = atoi(Constraint.data() + 3);
15811 assert(VSNum >= 0 && VSNum <= 63 &&
15812 "Attempted to access a vsr out of range");
15813 if (VSNum < 32)
15814 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15815 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15816 }
15817 std::pair<unsigned, const TargetRegisterClass *> R =
15819
15820 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15821 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15822 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15823 // register.
15824 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15825 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15826 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15827 PPC::GPRCRegClass.contains(R.first))
15828 return std::make_pair(TRI->getMatchingSuperReg(R.first,
15829 PPC::sub_32, &PPC::G8RCRegClass),
15830 &PPC::G8RCRegClass);
15831
15832 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15833 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15834 R.first = PPC::CR0;
15835 R.second = &PPC::CRRCRegClass;
15836 }
15837
15838 return R;
15839}
15840
15841/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15842/// vector. If it is invalid, don't add anything to Ops.
15844 std::string &Constraint,
15845 std::vector<SDValue>&Ops,
15846 SelectionDAG &DAG) const {
15847 SDValue Result;
15848
15849 // Only support length 1 constraints.
15850 if (Constraint.length() > 1) return;
15851
15852 char Letter = Constraint[0];
15853 switch (Letter) {
15854 default: break;
15855 case 'I':
15856 case 'J':
15857 case 'K':
15858 case 'L':
15859 case 'M':
15860 case 'N':
15861 case 'O':
15862 case 'P': {
15864 if (!CST) return; // Must be an immediate to match.
15865 SDLoc dl(Op);
15866 int64_t Value = CST->getSExtValue();
15867 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15868 // numbers are printed as such.
15869 switch (Letter) {
15870 default: llvm_unreachable("Unknown constraint letter!");
15871 case 'I': // "I" is a signed 16-bit constant.
15872 if (isInt<16>(Value))
15873 Result = DAG.getTargetConstant(Value, dl, TCVT);
15874 break;
15875 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
15877 Result = DAG.getTargetConstant(Value, dl, TCVT);
15878 break;
15879 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
15881 Result = DAG.getTargetConstant(Value, dl, TCVT);
15882 break;
15883 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
15884 if (isUInt<16>(Value))
15885 Result = DAG.getTargetConstant(Value, dl, TCVT);
15886 break;
15887 case 'M': // "M" is a constant that is greater than 31.
15888 if (Value > 31)
15889 Result = DAG.getTargetConstant(Value, dl, TCVT);
15890 break;
15891 case 'N': // "N" is a positive constant that is an exact power of two.
15892 if (Value > 0 && isPowerOf2_64(Value))
15893 Result = DAG.getTargetConstant(Value, dl, TCVT);
15894 break;
15895 case 'O': // "O" is the constant zero.
15896 if (Value == 0)
15897 Result = DAG.getTargetConstant(Value, dl, TCVT);
15898 break;
15899 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
15900 if (isInt<16>(-Value))
15901 Result = DAG.getTargetConstant(Value, dl, TCVT);
15902 break;
15903 }
15904 break;
15905 }
15906 }
15907
15908 if (Result.getNode()) {
15909 Ops.push_back(Result);
15910 return;
15911 }
15912
15913 // Handle standard constraint letters.
15914 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15915}
15916
15917// isLegalAddressingMode - Return true if the addressing mode represented
15918// by AM is legal for this target, for a load/store of the specified type.
15920 const AddrMode &AM, Type *Ty,
15921 unsigned AS, Instruction *I) const {
15922 // PPC does not allow r+i addressing modes for vectors!
15923 if (Ty->isVectorTy() && AM.BaseOffs != 0)
15924 return false;
15925
15926 // PPC allows a sign-extended 16-bit immediate field.
15927 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15928 return false;
15929
15930 // No global is ever allowed as a base.
15931 if (AM.BaseGV)
15932 return false;
15933
15934 // PPC only support r+r,
15935 switch (AM.Scale) {
15936 case 0: // "r+i" or just "i", depending on HasBaseReg.
15937 break;
15938 case 1:
15939 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
15940 return false;
15941 // Otherwise we have r+r or r+i.
15942 break;
15943 case 2:
15944 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
15945 return false;
15946 // Allow 2*r as r+r.
15947 break;
15948 default:
15949 // No other scales are supported.
15950 return false;
15951 }
15952
15953 return true;
15954}
15955
15956SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15957 SelectionDAG &DAG) const {
15959 MachineFrameInfo &MFI = MF.getFrameInfo();
15960 MFI.setReturnAddressIsTaken(true);
15961
15963 return SDValue();
15964
15965 SDLoc dl(Op);
15966 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15967
15968 // Make sure the function does not optimize away the store of the RA to
15969 // the stack.
15970 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15971 FuncInfo->setLRStoreRequired();
15972 bool isPPC64 = Subtarget.isPPC64();
15973 auto PtrVT = getPointerTy(MF.getDataLayout());
15974
15975 if (Depth > 0) {
15976 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15977 SDValue Offset =
15978 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15979 isPPC64 ? MVT::i64 : MVT::i32);
15980 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15981 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15983 }
15984
15985 // Just load the return address off the stack.
15986 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15987 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15989}
15990
15991SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15992 SelectionDAG &DAG) const {
15993 SDLoc dl(Op);
15994 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15995
15997 MachineFrameInfo &MFI = MF.getFrameInfo();
15998 MFI.setFrameAddressIsTaken(true);
15999
16001 bool isPPC64 = PtrVT == MVT::i64;
16002
16003 // Naked functions never have a frame pointer, and so we use r1. For all
16004 // other functions, this decision must be delayed until during PEI.
16005 unsigned FrameReg;
16006 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
16007 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16008 else
16009 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16010
16011 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
16012 PtrVT);
16013 while (Depth--)
16014 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16016 return FrameAddr;
16017}
16018
16019// FIXME? Maybe this could be a TableGen attribute on some registers and
16020// this table could be generated automatically from RegInfo.
16022 const MachineFunction &MF) const {
16023 bool isPPC64 = Subtarget.isPPC64();
16024
16025 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
16026 if (!is64Bit && VT != LLT::scalar(32))
16027 report_fatal_error("Invalid register global variable type");
16028
16030 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
16031 .Case("r2", isPPC64 ? Register() : PPC::R2)
16032 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
16033 .Default(Register());
16034
16035 if (Reg)
16036 return Reg;
16037 report_fatal_error("Invalid register name global variable");
16038}
16039
16041 // 32-bit SVR4 ABI access everything as got-indirect.
16042 if (Subtarget.is32BitELFABI())
16043 return true;
16044
16045 // AIX accesses everything indirectly through the TOC, which is similar to
16046 // the GOT.
16047 if (Subtarget.isAIXABI())
16048 return true;
16049
16051 // If it is small or large code model, module locals are accessed
16052 // indirectly by loading their address from .toc/.got.
16054 return true;
16055
16056 // JumpTable and BlockAddress are accessed as got-indirect.
16058 return true;
16059
16061 return Subtarget.isGVIndirectSymbol(G->getGlobal());
16062
16063 return false;
16064}
16065
16066bool
16068 // The PowerPC target isn't yet aware of offsets.
16069 return false;
16070}
16071
16073 const CallInst &I,
16074 MachineFunction &MF,
16075 unsigned Intrinsic) const {
16076 switch (Intrinsic) {
16077 case Intrinsic::ppc_qpx_qvlfd:
16078 case Intrinsic::ppc_qpx_qvlfs:
16079 case Intrinsic::ppc_qpx_qvlfcd:
16080 case Intrinsic::ppc_qpx_qvlfcs:
16081 case Intrinsic::ppc_qpx_qvlfiwa:
16082 case Intrinsic::ppc_qpx_qvlfiwz:
16083 case Intrinsic::ppc_altivec_lvx:
16084 case Intrinsic::ppc_altivec_lvxl:
16085 case Intrinsic::ppc_altivec_lvebx:
16086 case Intrinsic::ppc_altivec_lvehx:
16087 case Intrinsic::ppc_altivec_lvewx:
16088 case Intrinsic::ppc_vsx_lxvd2x:
16089 case Intrinsic::ppc_vsx_lxvw4x: {
16090 EVT VT;
16091 switch (Intrinsic) {
16092 case Intrinsic::ppc_altivec_lvebx:
16093 VT = MVT::i8;
16094 break;
16095 case Intrinsic::ppc_altivec_lvehx:
16096 VT = MVT::i16;
16097 break;
16098 case Intrinsic::ppc_altivec_lvewx:
16099 VT = MVT::i32;
16100 break;
16101 case Intrinsic::ppc_vsx_lxvd2x:
16102 VT = MVT::v2f64;
16103 break;
16104 case Intrinsic::ppc_qpx_qvlfd:
16105 VT = MVT::v4f64;
16106 break;
16107 case Intrinsic::ppc_qpx_qvlfs:
16108 VT = MVT::v4f32;
16109 break;
16110 case Intrinsic::ppc_qpx_qvlfcd:
16111 VT = MVT::v2f64;
16112 break;
16113 case Intrinsic::ppc_qpx_qvlfcs:
16114 VT = MVT::v2f32;
16115 break;
16116 default:
16117 VT = MVT::v4i32;
16118 break;
16119 }
16120
16121 Info.opc = ISD::INTRINSIC_W_CHAIN;
16122 Info.memVT = VT;
16123 Info.ptrVal = I.getArgOperand(0);
16124 Info.offset = -VT.getStoreSize()+1;
16125 Info.size = 2*VT.getStoreSize()-1;
16126 Info.align = Align(1);
16127 Info.flags = MachineMemOperand::MOLoad;
16128 return true;
16129 }
16130 case Intrinsic::ppc_qpx_qvlfda:
16131 case Intrinsic::ppc_qpx_qvlfsa:
16132 case Intrinsic::ppc_qpx_qvlfcda:
16133 case Intrinsic::ppc_qpx_qvlfcsa:
16134 case Intrinsic::ppc_qpx_qvlfiwaa:
16135 case Intrinsic::ppc_qpx_qvlfiwza: {
16136 EVT VT;
16137 switch (Intrinsic) {
16138 case Intrinsic::ppc_qpx_qvlfda:
16139 VT = MVT::v4f64;
16140 break;
16141 case Intrinsic::ppc_qpx_qvlfsa:
16142 VT = MVT::v4f32;
16143 break;
16144 case Intrinsic::ppc_qpx_qvlfcda:
16145 VT = MVT::v2f64;
16146 break;
16147 case Intrinsic::ppc_qpx_qvlfcsa:
16148 VT = MVT::v2f32;
16149 break;
16150 default:
16151 VT = MVT::v4i32;
16152 break;
16153 }
16154
16155 Info.opc = ISD::INTRINSIC_W_CHAIN;
16156 Info.memVT = VT;
16157 Info.ptrVal = I.getArgOperand(0);
16158 Info.offset = 0;
16159 Info.size = VT.getStoreSize();
16160 Info.align = Align(1);
16161 Info.flags = MachineMemOperand::MOLoad;
16162 return true;
16163 }
16164 case Intrinsic::ppc_qpx_qvstfd:
16165 case Intrinsic::ppc_qpx_qvstfs:
16166 case Intrinsic::ppc_qpx_qvstfcd:
16167 case Intrinsic::ppc_qpx_qvstfcs:
16168 case Intrinsic::ppc_qpx_qvstfiw:
16169 case Intrinsic::ppc_altivec_stvx:
16170 case Intrinsic::ppc_altivec_stvxl:
16171 case Intrinsic::ppc_altivec_stvebx:
16172 case Intrinsic::ppc_altivec_stvehx:
16173 case Intrinsic::ppc_altivec_stvewx:
16174 case Intrinsic::ppc_vsx_stxvd2x:
16175 case Intrinsic::ppc_vsx_stxvw4x: {
16176 EVT VT;
16177 switch (Intrinsic) {
16178 case Intrinsic::ppc_altivec_stvebx:
16179 VT = MVT::i8;
16180 break;
16181 case Intrinsic::ppc_altivec_stvehx:
16182 VT = MVT::i16;
16183 break;
16184 case Intrinsic::ppc_altivec_stvewx:
16185 VT = MVT::i32;
16186 break;
16187 case Intrinsic::ppc_vsx_stxvd2x:
16188 VT = MVT::v2f64;
16189 break;
16190 case Intrinsic::ppc_qpx_qvstfd:
16191 VT = MVT::v4f64;
16192 break;
16193 case Intrinsic::ppc_qpx_qvstfs:
16194 VT = MVT::v4f32;
16195 break;
16196 case Intrinsic::ppc_qpx_qvstfcd:
16197 VT = MVT::v2f64;
16198 break;
16199 case Intrinsic::ppc_qpx_qvstfcs:
16200 VT = MVT::v2f32;
16201 break;
16202 default:
16203 VT = MVT::v4i32;
16204 break;
16205 }
16206
16207 Info.opc = ISD::INTRINSIC_VOID;
16208 Info.memVT = VT;
16209 Info.ptrVal = I.getArgOperand(1);
16210 Info.offset = -VT.getStoreSize()+1;
16211 Info.size = 2*VT.getStoreSize()-1;
16212 Info.align = Align(1);
16213 Info.flags = MachineMemOperand::MOStore;
16214 return true;
16215 }
16216 case Intrinsic::ppc_qpx_qvstfda:
16217 case Intrinsic::ppc_qpx_qvstfsa:
16218 case Intrinsic::ppc_qpx_qvstfcda:
16219 case Intrinsic::ppc_qpx_qvstfcsa:
16220 case Intrinsic::ppc_qpx_qvstfiwa: {
16221 EVT VT;
16222 switch (Intrinsic) {
16223 case Intrinsic::ppc_qpx_qvstfda:
16224 VT = MVT::v4f64;
16225 break;
16226 case Intrinsic::ppc_qpx_qvstfsa:
16227 VT = MVT::v4f32;
16228 break;
16229 case Intrinsic::ppc_qpx_qvstfcda:
16230 VT = MVT::v2f64;
16231 break;
16232 case Intrinsic::ppc_qpx_qvstfcsa:
16233 VT = MVT::v2f32;
16234 break;
16235 default:
16236 VT = MVT::v4i32;
16237 break;
16238 }
16239
16240 Info.opc = ISD::INTRINSIC_VOID;
16241 Info.memVT = VT;
16242 Info.ptrVal = I.getArgOperand(1);
16243 Info.offset = 0;
16244 Info.size = VT.getStoreSize();
16245 Info.align = Align(1);
16246 Info.flags = MachineMemOperand::MOStore;
16247 return true;
16248 }
16249 default:
16250 break;
16251 }
16252
16253 return false;
16254}
16255
16256/// It returns EVT::Other if the type should be determined using generic
16257/// target-independent logic.
16259 const MemOp &Op, const AttributeList &FuncAttributes) const {
16260 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
16261 // When expanding a memset, require at least two QPX instructions to cover
16262 // the cost of loading the value to be stored from the constant pool.
16263 if (Subtarget.hasQPX() && Op.size() >= 32 &&
16264 (Op.isMemcpy() || Op.size() >= 64) && Op.isAligned(Align(32)) &&
16265 !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
16266 return MVT::v4f64;
16267 }
16268
16269 // We should use Altivec/VSX loads and stores when available. For unaligned
16270 // addresses, unaligned VSX loads are only fast starting with the P8.
16271 if (Subtarget.hasAltivec() && Op.size() >= 16 &&
16272 (Op.isAligned(Align(16)) ||
16273 ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16274 return MVT::v4i32;
16275 }
16276
16277 if (Subtarget.isPPC64()) {
16278 return MVT::i64;
16279 }
16280
16281 return MVT::i32;
16282}
16283
16284/// Returns true if it is beneficial to convert a load of a constant
16285/// to just the constant itself.
16287 Type *Ty) const {
16288 assert(Ty->isIntegerTy());
16289
16290 unsigned BitSize = Ty->getPrimitiveSizeInBits();
16291 return !(BitSize == 0 || BitSize > 64);
16292}
16293
16295 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16296 return false;
16297 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16298 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16299 return NumBits1 == 64 && NumBits2 == 32;
16300}
16301
16303 if (!VT1.isInteger() || !VT2.isInteger())
16304 return false;
16305 unsigned NumBits1 = VT1.getSizeInBits();
16306 unsigned NumBits2 = VT2.getSizeInBits();
16307 return NumBits1 == 64 && NumBits2 == 32;
16308}
16309
16311 // Generally speaking, zexts are not free, but they are free when they can be
16312 // folded with other operations.
16313 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16314 EVT MemVT = LD->getMemoryVT();
16315 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16316 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16317 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16318 LD->getExtensionType() == ISD::ZEXTLOAD))
16319 return true;
16320 }
16321
16322 // FIXME: Add other cases...
16323 // - 32-bit shifts with a zext to i64
16324 // - zext after ctlz, bswap, etc.
16325 // - zext after and by a constant mask
16326
16327 return TargetLowering::isZExtFree(Val, VT2);
16328}
16329
16331 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
16332 "invalid fpext types");
16333 // Extending to float128 is not free.
16334 if (DestVT == MVT::f128)
16335 return false;
16336 return true;
16337}
16338
16340 return isInt<16>(Imm) || isUInt<16>(Imm);
16341}
16342
16344 return isInt<16>(Imm) || isUInt<16>(Imm);
16345}
16346
16348 unsigned,
16349 unsigned,
16351 bool *Fast) const {
16353 return false;
16354
16355 // PowerPC supports unaligned memory access for simple non-vector types.
16356 // Although accessing unaligned addresses is not as efficient as accessing
16357 // aligned addresses, it is generally more efficient than manual expansion,
16358 // and generally only traps for software emulation when crossing page
16359 // boundaries.
16360
16361 if (!VT.isSimple())
16362 return false;
16363
16364 if (VT.isFloatingPoint() && !VT.isVector() &&
16365 !Subtarget.allowsUnalignedFPAccess())
16366 return false;
16367
16368 if (VT.getSimpleVT().isVector()) {
16369 if (Subtarget.hasVSX()) {
16370 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16371 VT != MVT::v4f32 && VT != MVT::v4i32)
16372 return false;
16373 } else {
16374 return false;
16375 }
16376 }
16377
16378 if (VT == MVT::ppcf128)
16379 return false;
16380
16381 if (Fast)
16382 *Fast = true;
16383
16384 return true;
16385}
16386
16392
16394 Type *Ty) const {
16395 switch (Ty->getScalarType()->getTypeID()) {
16396 case Type::FloatTyID:
16397 case Type::DoubleTyID:
16398 return true;
16399 case Type::FP128TyID:
16400 return Subtarget.hasP9Vector();
16401 default:
16402 return false;
16403 }
16404}
16405
16406// Currently this is a copy from AArch64TargetLowering::isProfitableToHoist.
16407// FIXME: add more patterns which are profitable to hoist.
16409 if (I->getOpcode() != Instruction::FMul)
16410 return true;
16411
16412 if (!I->hasOneUse())
16413 return true;
16414
16416 assert(User && "A single use instruction with no uses.");
16417
16418 if (User->getOpcode() != Instruction::FSub &&
16419 User->getOpcode() != Instruction::FAdd)
16420 return true;
16421
16422 const TargetOptions &Options = getTargetMachine().Options;
16423 const Function *F = I->getFunction();
16424 const DataLayout &DL = F->getParent()->getDataLayout();
16425 Type *Ty = User->getOperand(0)->getType();
16426
16427 return !(
16430 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16431}
16432
16433const MCPhysReg *
16435 // LR is a callee-save register, but we must treat it as clobbered by any call
16436 // site. Hence we include LR in the scratch registers, which are in turn added
16437 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16438 // to CTR, which is used by any indirect call.
16439 static const MCPhysReg ScratchRegs[] = {
16440 PPC::X12, PPC::LR8, PPC::CTR8, 0
16441 };
16442
16443 return ScratchRegs;
16444}
16445
16447 const Constant *PersonalityFn) const {
16448 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16449}
16450
16452 const Constant *PersonalityFn) const {
16453 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16454}
16455
16456bool
16458 EVT VT , unsigned DefinedValues) const {
16459 if (VT == MVT::v2i64)
16460 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16461
16462 if (Subtarget.hasVSX() || Subtarget.hasQPX())
16463 return true;
16464
16466}
16467
16474
16475// Create a fast isel object.
16476FastISel *
16478 const TargetLibraryInfo *LibInfo) const {
16479 return PPC::createFastISel(FuncInfo, LibInfo);
16480}
16481
16482// 'Inverted' means the FMA opcode after negating one multiplicand.
16483// For example, (fma -a b c) = (fnmsub a b c)
16484static unsigned invertFMAOpcode(unsigned Opc) {
16485 switch (Opc) {
16486 default:
16487 llvm_unreachable("Invalid FMA opcode for PowerPC!");
16488 case ISD::FMA:
16489 return PPCISD::FNMSUB;
16490 case PPCISD::FNMSUB:
16491 return ISD::FMA;
16492 }
16493}
16494
16496 bool LegalOps, bool OptForSize,
16497 NegatibleCost &Cost,
16498 unsigned Depth) const {
16500 return SDValue();
16501
16502 unsigned Opc = Op.getOpcode();
16503 EVT VT = Op.getValueType();
16504 SDNodeFlags Flags = Op.getNode()->getFlags();
16505
16506 switch (Opc) {
16507 case PPCISD::FNMSUB:
16508 // TODO: QPX subtarget is deprecated. No transformation here.
16509 if (!Op.hasOneUse() || !isTypeLegal(VT) || Subtarget.hasQPX())
16510 break;
16511
16512 const TargetOptions &Options = getTargetMachine().Options;
16513 SDValue N0 = Op.getOperand(0);
16514 SDValue N1 = Op.getOperand(1);
16515 SDValue N2 = Op.getOperand(2);
16516 SDLoc Loc(Op);
16517
16519 SDValue NegN2 =
16520 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16521
16522 if (!NegN2)
16523 return SDValue();
16524
16525 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16526 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16527 // These transformations may change sign of zeroes. For example,
16528 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16529 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16530 // Try and choose the cheaper one to negate.
16532 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16533 N0Cost, Depth + 1);
16534
16536 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16537 N1Cost, Depth + 1);
16538
16539 if (NegN0 && N0Cost <= N1Cost) {
16540 Cost = std::min(N0Cost, N2Cost);
16541 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16542 } else if (NegN1) {
16543 Cost = std::min(N1Cost, N2Cost);
16544 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16545 }
16546 }
16547
16548 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16549 if (isOperationLegal(ISD::FMA, VT)) {
16550 Cost = N2Cost;
16551 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16552 }
16553
16554 break;
16555 }
16556
16557 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16558 Cost, Depth);
16559}
16560
16561// Override to enable LOAD_STACK_GUARD lowering on Linux.
16563 if (!Subtarget.isTargetLinux())
16565 return true;
16566}
16567
16568// Override to disable global variable loading on Linux.
16570 if (!Subtarget.isTargetLinux())
16572}
16573
16575 bool ForCodeSize) const {
16576 if (!VT.isSimple() || !Subtarget.hasVSX())
16577 return false;
16578
16579 switch(VT.getSimpleVT().SimpleTy) {
16580 default:
16581 // For FP types that are currently not supported by PPC backend, return
16582 // false. Examples: f16, f80.
16583 return false;
16584 case MVT::f32:
16585 case MVT::f64:
16586 if (Subtarget.hasPrefixInstrs()) {
16587 // With prefixed instructions, we can materialize anything that can be
16588 // represented with a 32-bit immediate, not just positive zero.
16589 APFloat APFloatOfImm = Imm;
16591 }
16593 case MVT::ppcf128:
16594 return Imm.isPosZero();
16595 }
16596}
16597
16598// For vector shift operation op, fold
16599// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16601 SelectionDAG &DAG) {
16602 SDValue N0 = N->getOperand(0);
16603 SDValue N1 = N->getOperand(1);
16604 EVT VT = N0.getValueType();
16605 unsigned OpSizeInBits = VT.getScalarSizeInBits();
16606 unsigned Opcode = N->getOpcode();
16607 unsigned TargetOpcode;
16608
16609 switch (Opcode) {
16610 default:
16611 llvm_unreachable("Unexpected shift operation");
16612 case ISD::SHL:
16613 TargetOpcode = PPCISD::SHL;
16614 break;
16615 case ISD::SRL:
16616 TargetOpcode = PPCISD::SRL;
16617 break;
16618 case ISD::SRA:
16619 TargetOpcode = PPCISD::SRA;
16620 break;
16621 }
16622
16623 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16624 N1->getOpcode() == ISD::AND)
16625 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
16626 if (Mask->getZExtValue() == OpSizeInBits - 1)
16627 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16628
16629 return SDValue();
16630}
16631
16632SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16633 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16634 return Value;
16635
16636 SDValue N0 = N->getOperand(0);
16637 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16638 if (!Subtarget.isISA3_0() ||
16639 N0.getOpcode() != ISD::SIGN_EXTEND ||
16640 N0.getOperand(0).getValueType() != MVT::i32 ||
16641 CN1 == nullptr || N->getValueType(0) != MVT::i64)
16642 return SDValue();
16643
16644 // We can't save an operation here if the value is already extended, and
16645 // the existing shift is easier to combine.
16646 SDValue ExtsSrc = N0.getOperand(0);
16647 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16648 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16649 return SDValue();
16650
16651 SDLoc DL(N0);
16652 SDValue ShiftBy = SDValue(CN1, 0);
16653 // We want the shift amount to be i32 on the extswli, but the shift could
16654 // have an i64.
16655 if (ShiftBy.getValueType() == MVT::i64)
16656 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16657
16658 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16659 ShiftBy);
16660}
16661
16662SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16663 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16664 return Value;
16665
16666 return SDValue();
16667}
16668
16669SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
16670 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16671 return Value;
16672
16673 return SDValue();
16674}
16675
16676// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16677// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16678// When C is zero, the equation (addi Z, -C) can be simplified to Z
16679// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16681 const PPCSubtarget &Subtarget) {
16682 if (!Subtarget.isPPC64())
16683 return SDValue();
16684
16685 SDValue LHS = N->getOperand(0);
16686 SDValue RHS = N->getOperand(1);
16687
16688 auto isZextOfCompareWithConstant = [](SDValue Op) {
16689 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16690 Op.getValueType() != MVT::i64)
16691 return false;
16692
16693 SDValue Cmp = Op.getOperand(0);
16694 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16695 Cmp.getOperand(0).getValueType() != MVT::i64)
16696 return false;
16697
16698 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16699 int64_t NegConstant = 0 - Constant->getSExtValue();
16700 // Due to the limitations of the addi instruction,
16701 // -C is required to be [-32768, 32767].
16702 return isInt<16>(NegConstant);
16703 }
16704
16705 return false;
16706 };
16707
16710
16711 // If there is a pattern, canonicalize a zext operand to the RHS.
16713 std::swap(LHS, RHS);
16714 else if (!LHSHasPattern && !RHSHasPattern)
16715 return SDValue();
16716
16717 SDLoc DL(N);
16719 SDValue Cmp = RHS.getOperand(0);
16720 SDValue Z = Cmp.getOperand(0);
16721 auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16722
16723 assert(Constant && "Constant Should not be a null pointer.");
16724 int64_t NegConstant = 0 - Constant->getSExtValue();
16725
16726 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16727 default: break;
16728 case ISD::SETNE: {
16729 // when C == 0
16730 // --> addze X, (addic Z, -1).carry
16731 // /
16732 // add X, (zext(setne Z, C))--
16733 // \ when -32768 <= -C <= 32767 && C != 0
16734 // --> addze X, (addic (addi Z, -C), -1).carry
16735 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16737 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16739 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16740 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16741 SDValue(Addc.getNode(), 1));
16742 }
16743 case ISD::SETEQ: {
16744 // when C == 0
16745 // --> addze X, (subfic Z, 0).carry
16746 // /
16747 // add X, (zext(sete Z, C))--
16748 // \ when -32768 <= -C <= 32767 && C != 0
16749 // --> addze X, (subfic (addi Z, -C), 0).carry
16750 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16752 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16754 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16755 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16756 SDValue(Subc.getNode(), 1));
16757 }
16758 }
16759
16760 return SDValue();
16761}
16762
16763// Transform
16764// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16765// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16766// In this case both C1 and C2 must be known constants.
16767// C1+C2 must fit into a 34 bit signed integer.
16769 const PPCSubtarget &Subtarget) {
16770 if (!Subtarget.isUsingPCRelativeCalls())
16771 return SDValue();
16772
16773 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16774 // If we find that node try to cast the Global Address and the Constant.
16775 SDValue LHS = N->getOperand(0);
16776 SDValue RHS = N->getOperand(1);
16777
16778 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16779 std::swap(LHS, RHS);
16780
16781 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16782 return SDValue();
16783
16784 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16787
16788 // Check that both casts succeeded.
16789 if (!GSDN || !ConstNode)
16790 return SDValue();
16791
16792 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16793 SDLoc DL(GSDN);
16794
16795 // The signed int offset needs to fit in 34 bits.
16796 if (!isInt<34>(NewOffset))
16797 return SDValue();
16798
16799 // The new global address is a copy of the old global address except
16800 // that it has the updated Offset.
16801 SDValue GA =
16802 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16803 NewOffset, GSDN->getTargetFlags());
16805 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16806 return MatPCRel;
16807}
16808
16809SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16810 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16811 return Value;
16812
16813 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16814 return Value;
16815
16816 return SDValue();
16817}
16818
16819// Detect TRUNCATE operations on bitcasts of float128 values.
16820// What we are looking for here is the situtation where we extract a subset
16821// of bits from a 128 bit float.
16822// This can be of two forms:
16823// 1) BITCAST of f128 feeding TRUNCATE
16824// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16825// The reason this is required is because we do not have a legal i128 type
16826// and so we want to prevent having to store the f128 and then reload part
16827// of it.
16828SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16829 DAGCombinerInfo &DCI) const {
16830 // If we are using CRBits then try that first.
16831 if (Subtarget.useCRBits()) {
16832 // Check if CRBits did anything and return that if it did.
16833 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16834 return CRTruncValue;
16835 }
16836
16837 SDLoc dl(N);
16838 SDValue Op0 = N->getOperand(0);
16839
16840 // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16841 if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16842 EVT VT = N->getValueType(0);
16843 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16844 return SDValue();
16845 SDValue Sub = Op0.getOperand(0);
16846 if (Sub.getOpcode() == ISD::SUB) {
16847 SDValue SubOp0 = Sub.getOperand(0);
16848 SDValue SubOp1 = Sub.getOperand(1);
16849 if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16850 (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16851 return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16852 SubOp1.getOperand(0),
16853 DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16854 }
16855 }
16856 }
16857
16858 // Looking for a truncate of i128 to i64.
16859 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16860 return SDValue();
16861
16862 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16863
16864 // SRL feeding TRUNCATE.
16865 if (Op0.getOpcode() == ISD::SRL) {
16867 // The right shift has to be by 64 bits.
16868 if (!ConstNode || ConstNode->getZExtValue() != 64)
16869 return SDValue();
16870
16871 // Switch the element number to extract.
16872 EltToExtract = EltToExtract ? 0 : 1;
16873 // Update Op0 past the SRL.
16874 Op0 = Op0.getOperand(0);
16875 }
16876
16877 // BITCAST feeding a TRUNCATE possibly via SRL.
16878 if (Op0.getOpcode() == ISD::BITCAST &&
16879 Op0.getValueType() == MVT::i128 &&
16880 Op0.getOperand(0).getValueType() == MVT::f128) {
16881 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16882 return DCI.DAG.getNode(
16883 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16884 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16885 }
16886 return SDValue();
16887}
16888
16889SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16890 SelectionDAG &DAG = DCI.DAG;
16891
16893 if (!ConstOpOrElement)
16894 return SDValue();
16895
16896 // An imul is usually smaller than the alternative sequence for legal type.
16898 isOperationLegal(ISD::MUL, N->getValueType(0)))
16899 return SDValue();
16900
16901 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16902 switch (this->Subtarget.getCPUDirective()) {
16903 default:
16904 // TODO: enhance the condition for subtarget before pwr8
16905 return false;
16906 case PPC::DIR_PWR8:
16907 // type mul add shl
16908 // scalar 4 1 1
16909 // vector 7 2 2
16910 return true;
16911 case PPC::DIR_PWR9:
16912 case PPC::DIR_PWR10:
16914 // type mul add shl
16915 // scalar 5 2 2
16916 // vector 7 2 2
16917
16918 // The cycle RATIO of related operations are showed as a table above.
16919 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16920 // scalar and vector type. For 2 instrs patterns, add/sub + shl
16921 // are 4, it is always profitable; but for 3 instrs patterns
16922 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16923 // So we should only do it for vector type.
16924 return IsAddOne && IsNeg ? VT.isVector() : true;
16925 }
16926 };
16927
16928 EVT VT = N->getValueType(0);
16929 SDLoc DL(N);
16930
16931 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16932 bool IsNeg = MulAmt.isNegative();
16933 APInt MulAmtAbs = MulAmt.abs();
16934
16935 if ((MulAmtAbs - 1).isPowerOf2()) {
16936 // (mul x, 2^N + 1) => (add (shl x, N), x)
16937 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16938
16939 if (!IsProfitable(IsNeg, true, VT))
16940 return SDValue();
16941
16942 SDValue Op0 = N->getOperand(0);
16943 SDValue Op1 =
16944 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16945 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16946 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16947
16948 if (!IsNeg)
16949 return Res;
16950
16951 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16952 } else if ((MulAmtAbs + 1).isPowerOf2()) {
16953 // (mul x, 2^N - 1) => (sub (shl x, N), x)
16954 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16955
16956 if (!IsProfitable(IsNeg, false, VT))
16957 return SDValue();
16958
16959 SDValue Op0 = N->getOperand(0);
16960 SDValue Op1 =
16961 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16962 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16963
16964 if (!IsNeg)
16965 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16966 else
16967 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16968
16969 } else {
16970 return SDValue();
16971 }
16972}
16973
16974// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16975// in combiner since we need to check SD flags and other subtarget features.
16976SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16977 DAGCombinerInfo &DCI) const {
16978 SDValue N0 = N->getOperand(0);
16979 SDValue N1 = N->getOperand(1);
16980 SDValue N2 = N->getOperand(2);
16981 SDNodeFlags Flags = N->getFlags();
16982 EVT VT = N->getValueType(0);
16983 SelectionDAG &DAG = DCI.DAG;
16984 const TargetOptions &Options = getTargetMachine().Options;
16985 unsigned Opc = N->getOpcode();
16986 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16987 bool LegalOps = !DCI.isBeforeLegalizeOps();
16988 SDLoc Loc(N);
16989
16990 // TODO: QPX subtarget is deprecated. No transformation here.
16991 if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT))
16992 return SDValue();
16993
16994 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16995 // since (fnmsub a b c)=-0 while c-ab=+0.
16996 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16997 return SDValue();
16998
16999 // (fma (fneg a) b c) => (fnmsub a b c)
17000 // (fnmsub (fneg a) b c) => (fma a b c)
17001 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
17002 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
17003
17004 // (fma a (fneg b) c) => (fnmsub a b c)
17005 // (fnmsub a (fneg b) c) => (fma a b c)
17006 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
17007 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
17008
17009 return SDValue();
17010}
17011
17012bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17013 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
17014 if (!Subtarget.is64BitELFABI())
17015 return false;
17016
17017 // If not a tail call then no need to proceed.
17018 if (!CI->isTailCall())
17019 return false;
17020
17021 // If sibling calls have been disabled and tail-calls aren't guaranteed
17022 // there is no reason to duplicate.
17023 auto &TM = getTargetMachine();
17024 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
17025 return false;
17026
17027 // Can't tail call a function called indirectly, or if it has variadic args.
17028 const Function *Callee = CI->getCalledFunction();
17029 if (!Callee || Callee->isVarArg())
17030 return false;
17031
17032 // Make sure the callee and caller calling conventions are eligible for tco.
17033 const Function *Caller = CI->getParent()->getParent();
17034 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
17035 CI->getCallingConv()))
17036 return false;
17037
17038 // If the function is local then we have a good chance at tail-calling it
17039 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
17040}
17041
17042bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
17043 if (!Subtarget.hasVSX())
17044 return false;
17045 if (Subtarget.hasP9Vector() && VT == MVT::f128)
17046 return true;
17047 return VT == MVT::f32 || VT == MVT::f64 ||
17048 VT == MVT::v4f32 || VT == MVT::v2f64;
17049}
17050
17051bool PPCTargetLowering::
17052isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
17053 const Value *Mask = AndI.getOperand(1);
17054 // If the mask is suitable for andi. or andis. we should sink the and.
17055 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
17056 // Can't handle constants wider than 64-bits.
17057 if (CI->getBitWidth() > 64)
17058 return false;
17059 int64_t ConstVal = CI->getZExtValue();
17060 return isUInt<16>(ConstVal) ||
17061 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17062 }
17063
17064 // For non-constant masks, we can always use the record-form and.
17065 return true;
17066}
17067
17068// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
17069// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
17070// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
17071// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
17072// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
17073SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
17074 assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
17075 assert(Subtarget.hasP9Altivec() &&
17076 "Only combine this when P9 altivec supported!");
17077 EVT VT = N->getValueType(0);
17078 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17079 return SDValue();
17080
17081 SelectionDAG &DAG = DCI.DAG;
17082 SDLoc dl(N);
17083 if (N->getOperand(0).getOpcode() == ISD::SUB) {
17084 // Even for signed integers, if it's known to be positive (as signed
17085 // integer) due to zero-extended inputs.
17086 unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
17087 unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
17088 if ((SubOpcd0 == ISD::ZERO_EXTEND ||
17092 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17093 N->getOperand(0)->getOperand(0),
17094 N->getOperand(0)->getOperand(1),
17095 DAG.getTargetConstant(0, dl, MVT::i32));
17096 }
17097
17098 // For type v4i32, it can be optimized with xvnegsp + vabsduw
17099 if (N->getOperand(0).getValueType() == MVT::v4i32 &&
17100 N->getOperand(0).hasOneUse()) {
17101 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17102 N->getOperand(0)->getOperand(0),
17103 N->getOperand(0)->getOperand(1),
17104 DAG.getTargetConstant(1, dl, MVT::i32));
17105 }
17106 }
17107
17108 return SDValue();
17109}
17110
17111// For type v4i32/v8ii16/v16i8, transform
17112// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
17113// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
17114// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
17115// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
17116SDValue PPCTargetLowering::combineVSelect(SDNode *N,
17117 DAGCombinerInfo &DCI) const {
17118 assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
17119 assert(Subtarget.hasP9Altivec() &&
17120 "Only combine this when P9 altivec supported!");
17121
17122 SelectionDAG &DAG = DCI.DAG;
17123 SDLoc dl(N);
17124 SDValue Cond = N->getOperand(0);
17125 SDValue TrueOpnd = N->getOperand(1);
17126 SDValue FalseOpnd = N->getOperand(2);
17127 EVT VT = N->getOperand(1).getValueType();
17128
17129 if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
17130 FalseOpnd.getOpcode() != ISD::SUB)
17131 return SDValue();
17132
17133 // ABSD only available for type v4i32/v8i16/v16i8
17134 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17135 return SDValue();
17136
17137 // At least to save one more dependent computation
17138 if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
17139 return SDValue();
17140
17141 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17142
17143 // Can only handle unsigned comparison here
17144 switch (CC) {
17145 default:
17146 return SDValue();
17147 case ISD::SETUGT:
17148 case ISD::SETUGE:
17149 break;
17150 case ISD::SETULT:
17151 case ISD::SETULE:
17153 break;
17154 }
17155
17156 SDValue CmpOpnd1 = Cond.getOperand(0);
17157 SDValue CmpOpnd2 = Cond.getOperand(1);
17158
17159 // SETCC CmpOpnd1 CmpOpnd2 cond
17160 // TrueOpnd = CmpOpnd1 - CmpOpnd2
17161 // FalseOpnd = CmpOpnd2 - CmpOpnd1
17162 if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
17163 TrueOpnd.getOperand(1) == CmpOpnd2 &&
17164 FalseOpnd.getOperand(0) == CmpOpnd2 &&
17165 FalseOpnd.getOperand(1) == CmpOpnd1) {
17166 return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
17168 DAG.getTargetConstant(0, dl, MVT::i32));
17169 }
17170
17171 return SDValue();
17172}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
#define Success
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
SmallVector< MachineOperand, 4 > Cond
basic Basic Alias true
static GCRegistry::Add< ShadowStackGC > C("shadow-stack", "Very portable GC for uncooperative code generators")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:26
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition Compiler.h:280
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition Debug.h:122
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
lazy value info
#define F(x, y, z)
Definition MD5.cpp:56
#define I(x, y, z)
Definition MD5.cpp:59
#define G(x, y, z)
Definition MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
unsigned Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static bool isConstantOrUndef(const SDValue Op)
Module.h This file contains the declarations for the Module class.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static const MCPhysReg QFPR[]
QFPR - The set of QPX registers that should be allocated for arguments.
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static bool isFunctionGlobalAddress(SDValue Callee)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs, bool HasQPX)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG)
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static Instruction * callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64)
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG, const PPCSubtarget &Subtarget, bool isPatchPoint)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
uint32_t Size
Definition Profile.cpp:46
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSplat(ArrayRef< Value * > VL)
Shadow Stack GC Lowering
static bool Enabled
Definition Statistic.cpp:50
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:196
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:421
static bool is64Bit(const char *name)
bool isPosZero() const
Definition APFloat.h:1214
Class for arbitrary precision integers.
Definition APInt.h:69
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition APInt.h:566
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:468
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition APInt.h:666
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition APInt.h:654
This class represents an incoming formal argument to a Function.
Definition Argument.h:29
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:59
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:107
The address of a basic block.
Definition Constants.h:850
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:271
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
This is the shared class of boolean and integer constants.
Definition Constants.h:77
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:111
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:233
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
A debug info location.
Definition DebugLoc.h:33
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:65
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:647
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.h:340
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:644
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:219
const Function & getFunction() const
Definition Function.h:135
arg_iterator arg_begin()
Definition Function.h:720
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:252
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.h:330
const GlobalObject * getBaseObject() const
Definition Globals.cpp:463
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:228
StringRef getSection() const
Definition Globals.cpp:162
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
bool hasComdat() const
const BasicBlock * getParent() const
Definition Instruction.h:94
bool hasAtomicLoad() const
Return true if this atomic instruction loads from memory.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
This class is used to represent ISD::LOAD nodes.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition LoopInfo.h:143
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition LoopInfo.h:96
block_iterator block_end() const
Definition LoopInfo.h:167
block_iterator block_begin() const
Definition LoopInfo.h:166
Context object for machine code objects.
Definition MCContext.h:67
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:22
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition MCExpr.h:384
StringRef getSymbolTableName() const
void setRepresentedCsect(MCSectionXCOFF *C)
bool hasRepresentedCsectSet() const
Machine Value Type.
TypeSize getScalarSizeInBits() const
static mvt_range fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static mvt_range integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static mvt_range fp_valuetypes()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
uint64_t getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
const SDValue & getBasePtr() const
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
unsigned getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
unsigned getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
bool useLongCalls() const
bool hasFRSQRTE() const
bool hasQPX() const
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
bool hasFPCVT() const
bool isAIXABI() const
bool useSoftFloat() const
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
bool hasAltivec() const
bool allowsUnalignedFPAccess() const
const PPCFrameLowering * getFrameLowering() const override
bool needsSwapsForVSXMemOps() const
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool needsTwoConstNR() const
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
bool hasFSQRT() const
bool hasP9Vector() const
bool hasFRE() const
bool hasFRSQRTES() const
MCRegister getEnvironmentPointerRegister() const
const PPCInstrInfo * getInstrInfo() const override
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
bool hasRecipPrec() const
bool hasSTFIWX() const
bool isSVR4ABI() const
bool hasInvariantFunctionDescriptors() const
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
POPCNTDKind hasPOPCNTD() const
bool hasPrefixInstrs() const
bool hasPartwordAtomics() const
bool hasSPE() const
bool hasLFIWAX() const
bool isLittleEndian() const
bool hasFCPSGN() const
bool isTargetLinux() const
bool hasP9Altivec() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
bool is64BitELFABI() const
bool hasFPRND() const
bool isELFv2ABI() const
bool hasP8Vector() const
const PPCTargetMachine & getTargetMachine() const
bool isPredictableSelectIsExpensive() const
bool enableMachineScheduler() const override
Scheduling customization.
bool hasFRES() const
bool hasLDBRX() const
const PPCRegisterInfo * getRegisterInfo() const override
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
bool isISA3_0() const
bool hasVSX() const
bool hasDirectMove() const
bool hasP8Altivec() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
unsigned getStackProbeSize(MachineFunction &MF) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isMulhCheaperThanMulShift(EVT Type) const override
isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific type is cheaper than a mul...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always beneficiates from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
Instruction * emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
bool hasInlineStackProbe(MachineFunction &MF) const override
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=None) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
Instruction * emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
const SDNodeFlags getFlags() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offset=0, unsigned TargetFlags=0)
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts)
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, int64_t Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset is a wrapper around scalable and non-scalable offsets and is used in several functions su...
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:57
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:511
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition StringRef.h:160
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:152
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isMulhCheaperThanMulShift(EVT Type) const
Return true if a mulh[s|u] node for a specific type is cheaper than a multiply followed by a shift.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
static XCOFF::StorageClass getStorageClassForGlobal(const GlobalObject *GO)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetOptions Options
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:80
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:231
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:147
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
static Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:170
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:150
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:198
static Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:174
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
Definition User.h:169
unsigned getNumOperands() const
Definition User.h:191
LLVM Value Representation.
Definition Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:244
User * user_back()
Definition Value.h:404
StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:270
Implementation for an ilist node.
Definition ilist_node.h:39
self_iterator getIterator()
Definition ilist_node.h:81
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:80
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:42
@ C
C - The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:620
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:220
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition ISDOpcodes.h:903
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition ISDOpcodes.h:899
@ TargetConstantPool
Definition ISDOpcodes.h:152
@ FLT_ROUNDS_
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition ISDOpcodes.h:726
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:131
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:445
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:234
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:585
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition ISDOpcodes.h:932
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:253
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:223
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition ISDOpcodes.h:817
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:650
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:431
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:183
@ GlobalAddress
Definition ISDOpcodes.h:71
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:657
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:484
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:342
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:559
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:239
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition ISDOpcodes.h:754
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:744
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:213
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition ISDOpcodes.h:970
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:363
@ GlobalTLSAddress
Definition ISDOpcodes.h:72
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:644
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:401
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:528
@ TargetExternalSymbol
Definition ISDOpcodes.h:153
@ BR
Control flow instructions. These all have token chains.
Definition ISDOpcodes.h:833
@ TargetJumpTable
Definition ISDOpcodes.h:151
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition ISDOpcodes.h:988
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition ISDOpcodes.h:811
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition ISDOpcodes.h:762
@ BR_CC
BR_CC - Conditional branch.
Definition ISDOpcodes.h:854
@ BR_JT
BR_JT - Jumptable branch.
Definition ISDOpcodes.h:842
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:310
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:597
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition ISDOpcodes.h:997
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:206
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition ISDOpcodes.h:928
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:148
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:540
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:576
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:520
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:511
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition ISDOpcodes.h:801
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:476
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:647
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:612
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition ISDOpcodes.h:794
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition ISDOpcodes.h:827
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:665
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition ISDOpcodes.h:545
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:729
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:606
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:400
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:113
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:87
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:394
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:416
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:393
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:703
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition ISDOpcodes.h:959
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:421
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:551
@ TRAP
TRAP - Trapping instruction.
Definition ISDOpcodes.h:979
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:168
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:263
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:352
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:465
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:717
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:698
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:374
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:125
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:653
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition ISDOpcodes.h:923
@ BRCOND
BRCOND - Conditional branch.
Definition ISDOpcodes.h:848
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:633
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:59
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:441
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:301
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition ISDOpcodes.h:917
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition ISDOpcodes.h:976
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:176
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:149
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:456
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
@ VecShuffle
Definition NVPTX.h:69
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:103
@ MO_GOT_FLAG
MO_GOT_FLAG - If this bit is set the symbol reference is to be computed via the GOT.
Definition PPC.h:108
@ MO_TPREL_HA
Definition PPC.h:118
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition PPC.h:95
@ MO_TLS
Definition PPC.h:127
@ MO_TPREL_LO
Definition PPC.h:117
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:114
@ MO_HA
Definition PPC.h:115
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:99
@ QBFLT
QBFLT = Access the underlying QPX floating-point boolean representation.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ QVGPCI
QVGPCI = This corresponds to the QPX qvgpci instruction.
@ VABSD
An SDNode for Power9 vector absolute value difference.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ QVFPERM
QVFPERM = This corresponds to the QPX qvfperm instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ VCMPo
RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the altivec VCMP*o instructions.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ FP_TO_UINT_IN_VSR
Floating-point-to-interger conversion instructions.
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
@ QVLFSb
QBRC, CHAIN = QVLFSb CHAIN, Ptr The 4xf32 load used for v4i1 constants.
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ GlobalBaseReg
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry,...
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ QVALIGNI
QVALIGNI = This corresponds to the QPX qvaligni instruction.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ RET_FLAG
Return with a flag operand, matched by 'blr'.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ XSMAXCDP
XSMAXCDP, XSMINCDP - C-type min/max instructions.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ QVESPLATI
QVESPLATI = This corresponds to the QPX qvesplati instruction.
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
int isQVALIGNIShuffleMask(SDNode *N)
If this is a qvaligni shuffle mask, return the shift amount, otherwise return -1.
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ XMC_PR
Program Code.
Definition XCOFF.h:38
@ XTY_ER
External reference.
Definition XCOFF.h:174
constexpr double e
Definition MathExtras.h:58
This class represents lattice values for constants.
constexpr bool isUInt< 16 >(uint64_t x)
Definition MathExtras.h:409
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:148
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAcquireOrStronger(AtomicOrdering ao)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:497
constexpr bool isInt< 16 >(int64_t x)
Definition MathExtras.h:371
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition MathExtras.h:664
unsigned M1(unsigned Val)
Definition VE.h:353
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1498
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition MathExtras.h:696
const NoneType None
Definition None.h:23
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:492
bool convertToNonDenormSingle(APInt &ArgAPInt)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition STLExtras.h:1335
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition MathExtras.h:157
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:132
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition ArrayRef.h:458
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition Error.cpp:140
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
@ Z
zlib style complession
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:158
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1567
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition MathExtras.h:673
unsigned M0(unsigned Val)
Definition VE.h:352
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:762
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned BitWidth
bool isReleaseOrStronger(AtomicOrdering ao)
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:778
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:962
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:163
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:190
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition APFloat.cpp:178
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getScalarSizeInBits() const
Definition ValueTypes.h:321
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:331
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:131
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:141
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:315
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:260
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:156
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:267
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:272
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:151
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:280
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:146
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:65
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:119
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs